Source code for sampling.name

# -*- coding: utf-8 -*-

"""
Community sample names.
"""

import base64
import hashlib
import logging
import sys
from collections import defaultdict
from itertools import count
from pathlib import Path
from typing import Iterator

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self


[docs] class Name: """ A community sample name has attributes *id*, a symbolic ecosystem name or a hash of the community; *sub*, a subset, typically "original", "minus", or "added"; *size*, the number of models in the community; *rep*, a repetition number for multiple samples. The class attribute ``serial`` of Name keeps a counter for each ecosystem, to generate unique serial numbers. """ serial: defaultdict[str, Iterator] = defaultdict(count) def __init__( # pylint: disable=R0913 self, eco: str | None = None, sub: str | None = "original", rep: int | None = None, size: int | None = None, com: list[str | Path] | None = None, name: Self | None = None, ) -> None: self.eco: str | None = eco or getattr(name, "eco", None) self.sub: str | None = sub or getattr(name, "sub", None) self.rep: int | None = rep or getattr(name, "rep", 0) self.ident: str = ( self.mk_id(com) if not self.eco else getattr(name, "ident", str(next(Name.serial[self.eco]))) ) self.size: int | None = size or getattr(name, "size", 0) if com and not size: logging.debug( "Resetting size for %s from %d to %d", str(self), self.size, len(com) ) self.size = len(com) if com and size and not name: assert self.size == len(com)
[docs] @staticmethod def mk_id(community: list[str | Path] | None) -> str: """Make an id for a community.""" ident = hashlib.sha1() for c in community or []: ident.update(str(c).encode()) b32 = base64.b32encode(ident.digest()).decode() logging.debug( "mk_id %s from %s", b32, " ".join(str(c) for c in community or []) ) return b32
[docs] @classmethod def from_string(cls, string: str) -> Self: """ Make ecosystem name from its string representation. """ eco, ident, sub, size, rep = (string.split("_") + [None] * 5)[0:5] size_i: int | None = int(size) if size else None rep_i: int | None = int(rep) if rep else None new_name: Self = cls(eco=eco, sub=sub, size=size_i, rep=rep_i) if ident: new_name.ident = ident return new_name
[docs] def __str__(self) -> str: """ Serialize a community name. """ return "_".join( str(i) for i in [self.eco or "X", self.ident, self.sub, self.size, self.rep] )
[docs] def __eq__(self, other) -> bool: """Compare internal representations.""" return ( self.eco == other.eco and self.ident == other.ident and self.sub == other.sub and (self.rep == other.rep or not self.rep or not other.rep) and self.size == other.size )
[docs] def __hash__(self) -> int: """Default hash function.""" return hash(self.__str__())