Source code for cocomico.base

# -*- coding: utf-8 -*-

"""
CoCoMiCo base semantic domains.
"""

import logging
import re
import sys
import xml.etree.ElementTree as etree
from itertools import product
from pathlib import Path
from typing import FrozenSet, Iterable, NamedTuple, NewType, Set, cast

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self


# Types for base domains.
# Use NewType to force typing and avoid, for example, swapping biomolecule and provenance!

Taxon = NewType("Taxon", str)  # Taxon must be usable as dict key.

Biomolecule = NewType("Biomolecule", str)


[docs] class Reaction(NamedTuple): """ Reactions with the same name but in different taxa are distinct. """ name: str taxon: Taxon def __str__(self) -> str: return f"{self.name}.{self.taxon}"
# Metabolite needs to be a class because it has methods.
[docs] class Metabolite: """ Metabolites are metabolite biomolecules tagged with organism biomolecules. :param provenance: provenance organism :type provenance: Taxon :param biomolecule: generic metabolite biomolecule :type biomolecule: Biomolecule """ def __init__(self, provenance: Taxon, biomolecule: Biomolecule) -> None: """ :param provenance: provenance organism :param biomolecule: generic metabolite biomolecule """ self._provenance = provenance self._biomolecule = biomolecule @property def provenance(self) -> Taxon: """ Metabolite provenance organism biomolecule. :rtype: Taxon """ return self._provenance @property def biomolecule(self) -> Biomolecule: """ Metabolite generic biomolecule. :rtype: Biomolecule """ return self._biomolecule
[docs] @classmethod def from_string(cls, identifier: str) -> Self: """ Factory method to create a metabolite from a string. :param identifier: string-coded metabolite "NAME.TAXON" :rtype: Metabolite """ biomolecule, _, provenance = identifier.partition(".") return cls(provenance=Taxon(provenance), biomolecule=Biomolecule(biomolecule))
def __str__(self) -> str: return f"{self._biomolecule}.{self._provenance}" def __repr__(self) -> str: return f'Metabolite(provenance="{self._provenance}", biomolecule="{self._biomolecule}")' def __eq__(self, other) -> bool: # compare internal representations, not prettified output. return ( self._biomolecule == other._biomolecule and self._provenance == other._provenance ) def __hash__(self): return hash(self.__str__())
# MetaboliteSet is a class because it has methods.
[docs] class MetaboliteSet(Set[Metabolite]): """ MetaboliteSets are sets of metabolites with additional operations for taxa. """ @property def taxa(self) -> set[Taxon]: """ The taxa of a metabolite set. """ return {m.provenance for m in self} @property def biomolecules(self) -> set[Biomolecule]: """ The taxa of a metabolite set. """ return {m.biomolecule for m in self}
[docs] def select(self, taxa: set[Taxon]) -> Self: """ Subset of a metabolite set from the given taxa. """ return self.__class__({m for m in self if m.provenance in taxa})
[docs] def __str__(self) -> str: return f"{{{ ','.join(sorted(str(i) for i in self)) }}}"
# Seeds needs to be a class because it has methods.
[docs] class Seeds(FrozenSet[Biomolecule]): """ Seeds are sets of metabolite biomolecules that define inputs to a community. :param seeds: set of metabolite biomolecules :type seeds: Iterable[Biomolecule] :param name: symbolic name for the set of seeds :type name: str, optional """ name: str # declare attribute for mypy def __new__(cls, *args, **kwargs) -> Self: """ Initialize new seed object :param seeds: seed molecular species :param name: symbolic name for the set of seeds """ # Optional name. name: str = kwargs.pop("name", None) # Seeds can be provided as an argument or as a keyword arg; if provided # a keywork argument overrides any ordinary arguments. seeds: Iterable[Biomolecule] = kwargs.pop("seeds", args[0] if args else []) # Result is a frozenset of those ids result: Self = cast(Self, super().__new__(cls, seeds)) result.name = name logging.debug("Init %s {%s}", str(result), " ".join(sorted(result))) return result
[docs] @classmethod def from_file(cls, file: Path) -> Self: """ Read new seed object from an SBML file :param file: path to SBML file containing seed molecular species :type file: Path """ # Parse SBML file logging.debug("Read SBML seeds from %s", file.name) try: sbml = etree.parse(file).getroot() # Get XML namespace from <sbml>, or empty if not present ns = re.search(r"\{.*\}|$", sbml.tag).group() # type: ignore[union-attr] # Get attributes of top-level <model> element attrib = sbml.find(f".//{ ns }model").attrib # type: ignore[union-attr] except etree.ParseError as etree_exc: logging.exception("Invalid SBML seeds in %s", file) raise ValueError("Invalid SBML seeds") from etree_exc # XPath search for any <species> tag xpath = f".//{ ns }species" # Retrieve ids of all species elements seeds = [Biomolecule(str(elt.attrib.get("id"))) for elt in sbml.findall(xpath)] # Retrieve name for these seeds. name = ( (_name := attrib.get("name")) or (_id := attrib.get("id")) or file.stem.removeprefix("seeds_") ) if not (_name or _id): logging.info("No SBML id in %s, using name %s", file.name, name) # Result is a frozenset of those ids return cls(seeds=seeds, name=name)
[docs] def __str__(self) -> str: """String representation.""" return f'Seeds<{hex(id(self))}> "{self.name}"'
# Simple Exchanges are 1-1
[docs] class Exchange(NamedTuple): """ Reactions with the same name but in different taxa are distinct. """ producer: Taxon consumer: Taxon def __str__(self) -> str: return f"{self.producer}--{self.consumer}"
# ExchangeNM needs to be a class because it has methods.
[docs] class ExchangeNM: """ An Exchanged defines an n to m relation between producers and consumers. Designed to represent metabolite exchanges. Exchange objects act like a set of pairs of producer-consumer relations that can be queried for membership. Sets of roducers and consumers can also be retrieved separately. :param producers: set of producer taxa for this exchange :param consumers: set of consumer taxa for this exchange """ def __init__( self, relations: set[tuple[Taxon, Taxon]] | set[Exchange] | None = None, producers: Iterable[Taxon] | None = None, consumers: Iterable[Taxon] | None = None, ) -> None: """ Create an exchange between taxa defined either by pairs of (producer,consumer) relations, or by the cartesian product of sets of producer and consumer taxa. :param relations: set of (producer,consumer) pairs defining this exchange :param producers: set of producer taxa defining this exchange :param consumers: set of consumer taxa defining this exchange """ self.producers: Set[Taxon] = set(producers or set()) self.consumers: Set[Taxon] = set(consumers or set()) if relations is not None: self.producers.update(p for p, _ in relations) self.consumers.update(c for _, c in relations)
[docs] def __iter__(self): """ A exchange is iterable, and iterates over (producer,consumer) tuples. """ return product(self.producers, self.consumers)
[docs] def __contains__(self, item: tuple[Taxon, Taxon]) -> bool: """Test for membership without iterating.""" return item[0] in self.producers and item[1] in self.consumers
[docs] def __eq__(self, other) -> bool: """Exchanges are equal if their producer and consumer sets are equal.""" return self.producers == other.producers and self.consumers == other.consumers