# -*- coding: utf-8 -*-
"""
CoCoMiCo base semantic domains.
"""
import logging
import re
import sys
import xml.etree.ElementTree as etree
from itertools import product
from pathlib import Path
from typing import FrozenSet, Iterable, NamedTuple, NewType, Set, cast
if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self
# Types for base domains.
# Use NewType to force typing and avoid, for example, swapping biomolecule and provenance!
Taxon = NewType("Taxon", str) # Taxon must be usable as dict key.
Biomolecule = NewType("Biomolecule", str)
[docs]
class Reaction(NamedTuple):
"""
Reactions with the same name but in different taxa are distinct.
"""
name: str
taxon: Taxon
def __str__(self) -> str:
return f"{self.name}.{self.taxon}"
# Metabolite needs to be a class because it has methods.
# MetaboliteSet is a class because it has methods.
# Seeds needs to be a class because it has methods.
[docs]
class Seeds(FrozenSet[Biomolecule]):
"""
Seeds are sets of metabolite biomolecules that define inputs to a community.
:param seeds: set of metabolite biomolecules
:type seeds: Iterable[Biomolecule]
:param name: symbolic name for the set of seeds
:type name: str, optional
"""
name: str # declare attribute for mypy
def __new__(cls, *args, **kwargs) -> Self:
"""
Initialize new seed object
:param seeds: seed molecular species
:param name: symbolic name for the set of seeds
"""
# Optional name.
name: str = kwargs.pop("name", None)
# Seeds can be provided as an argument or as a keyword arg; if provided
# a keywork argument overrides any ordinary arguments.
seeds: Iterable[Biomolecule] = kwargs.pop("seeds", args[0] if args else [])
# Result is a frozenset of those ids
result: Self = cast(Self, super().__new__(cls, seeds))
result.name = name
logging.debug("Init %s {%s}", str(result), " ".join(sorted(result)))
return result
[docs]
@classmethod
def from_file(cls, file: Path) -> Self:
"""
Read new seed object from an SBML file
:param file: path to SBML file containing seed molecular species
:type file: Path
"""
# Parse SBML file
logging.debug("Read SBML seeds from %s", file.name)
try:
sbml = etree.parse(file).getroot()
# Get XML namespace from <sbml>, or empty if not present
ns = re.search(r"\{.*\}|$", sbml.tag).group() # type: ignore[union-attr]
# Get attributes of top-level <model> element
attrib = sbml.find(f".//{ ns }model").attrib # type: ignore[union-attr]
except etree.ParseError as etree_exc:
logging.exception("Invalid SBML seeds in %s", file)
raise ValueError("Invalid SBML seeds") from etree_exc
# XPath search for any <species> tag
xpath = f".//{ ns }species"
# Retrieve ids of all species elements
seeds = [Biomolecule(str(elt.attrib.get("id"))) for elt in sbml.findall(xpath)]
# Retrieve name for these seeds.
name = (
(_name := attrib.get("name"))
or (_id := attrib.get("id"))
or file.stem.removeprefix("seeds_")
)
if not (_name or _id):
logging.info("No SBML id in %s, using name %s", file.name, name)
# Result is a frozenset of those ids
return cls(seeds=seeds, name=name)
[docs]
def __str__(self) -> str:
"""String representation."""
return f'Seeds<{hex(id(self))}> "{self.name}"'
# Simple Exchanges are 1-1
[docs]
class Exchange(NamedTuple):
"""
Reactions with the same name but in different taxa are distinct.
"""
producer: Taxon
consumer: Taxon
def __str__(self) -> str:
return f"{self.producer}--{self.consumer}"
# ExchangeNM needs to be a class because it has methods.
[docs]
class ExchangeNM:
"""
An Exchanged defines an n to m relation between producers and consumers.
Designed to represent metabolite exchanges.
Exchange objects act like a set of pairs of producer-consumer relations
that can be queried for membership. Sets of roducers and consumers can
also be retrieved separately.
:param producers: set of producer taxa for this exchange
:param consumers: set of consumer taxa for this exchange
"""
def __init__(
self,
relations: set[tuple[Taxon, Taxon]] | set[Exchange] | None = None,
producers: Iterable[Taxon] | None = None,
consumers: Iterable[Taxon] | None = None,
) -> None:
"""
Create an exchange between taxa defined either by pairs of
(producer,consumer) relations, or by the cartesian product
of sets of producer and consumer taxa.
:param relations: set of (producer,consumer) pairs defining this exchange
:param producers: set of producer taxa defining this exchange
:param consumers: set of consumer taxa defining this exchange
"""
self.producers: Set[Taxon] = set(producers or set())
self.consumers: Set[Taxon] = set(consumers or set())
if relations is not None:
self.producers.update(p for p, _ in relations)
self.consumers.update(c for _, c in relations)
[docs]
def __iter__(self):
"""
A exchange is iterable, and iterates over (producer,consumer) tuples.
"""
return product(self.producers, self.consumers)
[docs]
def __contains__(self, item: tuple[Taxon, Taxon]) -> bool:
"""Test for membership without iterating."""
return item[0] in self.producers and item[1] in self.consumers
[docs]
def __eq__(self, other) -> bool:
"""Exchanges are equal if their producer and consumer sets are equal."""
return self.producers == other.producers and self.consumers == other.consumers