[docs]
class Name:
"""
A community sample name has attributes
*id*, a symbolic ecosystem name or a hash of the community;
*sub*, a subset, typically "original", "minus", or "added";
*size*, the number of models in the community;
*rep*, a repetition number for multiple samples.
The class attribute ``serial`` of Name keeps a counter for each
ecosystem, to generate unique serial numbers.
"""
serial: defaultdict[str, Iterator] = defaultdict(count)
def __init__(
# pylint: disable=R0913
self,
eco: str | None = None,
sub: str | None = "original",
rep: int | None = None,
size: int | None = None,
com: list[str | Path] | None = None,
name: Self | None = None,
) -> None:
self.eco: str | None = eco or getattr(name, "eco", None)
self.sub: str | None = sub or getattr(name, "sub", None)
self.rep: int | None = rep or getattr(name, "rep", 0)
self.ident: str = (
self.mk_id(com)
if not self.eco
else getattr(name, "ident", str(next(Name.serial[self.eco])))
)
self.size: int | None = size or getattr(name, "size", 0)
if com and not size:
logging.debug(
"Resetting size for %s from %d to %d", str(self), self.size, len(com)
)
self.size = len(com)
if com and size and not name:
assert self.size == len(com)
[docs]
@staticmethod
def mk_id(community: list[str | Path] | None) -> str:
"""Make an id for a community."""
ident = hashlib.sha1()
for c in community or []:
ident.update(str(c).encode())
b32 = base64.b32encode(ident.digest()).decode()
logging.debug(
"mk_id %s from %s", b32, " ".join(str(c) for c in community or [])
)
return b32
[docs]
@classmethod
def from_string(cls, string: str) -> Self:
"""
Make ecosystem name from its string representation.
"""
eco, ident, sub, size, rep = (string.split("_") + [None] * 5)[0:5]
size_i: int | None = int(size) if size else None
rep_i: int | None = int(rep) if rep else None
new_name: Self = cls(eco=eco, sub=sub, size=size_i, rep=rep_i)
if ident:
new_name.ident = ident
return new_name
[docs]
def __str__(self) -> str:
"""
Serialize a community name.
"""
return "_".join(
str(i) for i in [self.eco or "X", self.ident, self.sub, self.size, self.rep]
)
[docs]
def __eq__(self, other) -> bool:
"""Compare internal representations."""
return (
self.eco == other.eco
and self.ident == other.ident
and self.sub == other.sub
and (self.rep == other.rep or not self.rep or not other.rep)
and self.size == other.size
)
[docs]
def __hash__(self) -> int:
"""Default hash function."""
return hash(self.__str__())