Source code for cocomico.sbmlreader

# -*- coding: utf-8 -*-

"""
CoCoMiCo SBML reader.
"""

import gzip
import logging
import re
import xml.etree.ElementTree as etree
from pathlib import Path
from typing import Tuple
from xml.etree.ElementTree import Element  # for type hinting

from cocomico.base import Biomolecule, Metabolite, Reaction, Taxon
from cocomico.model import Model

# SbmlReader class


[docs] class SbmlReader: """ SBML reader builds Models from the bipartite metabolite-reaction relations extracted from SBML files. The extraction is memoized: if a file or taxon is requested that has already been analyzed, the existing model is shared rather than being recomputed. :param sbml_dir: optional directory where model files are found :type sbml_dir: Path :raises ValueError: if the model is empty or invalid """ def __init__(self, sbml_dir: Path | None = None) -> None: """ Initialize an SBML reader, optionally with an SBML directory. """ # self.sbml_dir is None or a Path specified by keyword self.sbml_dir: None | Path = None if sbml_dir: self.sbml_dir = sbml_dir assert self.sbml_dir.is_dir() # Memoization maps self.files: dict[Path, Taxon] = {} self.models: dict[Taxon, Model] = {}
[docs] def from_file(self, path: Path) -> Tuple[Taxon, Model]: """ Parse an SBML file and convert its relations to a Model. """ # If we have already seen this file, use previous result if path in self.files: taxon = self.files[path] return taxon, self.models[taxon] # Read XML from the file, retrieve the taxon id and the model. taxon, sbml_model = self.read_sbml_file(path) logging.debug("Read SBML model %s from %s", taxon, path.name) # If we have already seen this taxon, use previous result. if taxon in self.models: return taxon, self.models[taxon] # Decode the relations in the SBML model self.models[taxon] = self.decode_relations(taxon, sbml_model) self.files[path] = taxon # Return both the taxon and the model return taxon, self.models[taxon]
[docs] @staticmethod def read_sbml_file(candidate: Path): """ RetrieveExtract the taxon id and the model from the XML tree. """ # Automatically check for file suffixes, w/ w/o compression. # Try foo, foo.gz, foo.sbml, foo.sbml.gz, foo.xml, foo.xml.gz. path: Path | None = next( ( path_suf for ext in ["", ".sbml", ".xml"] for suffix in [ext, ext + ".gz"] if (path_suf := candidate.with_suffix(suffix)).is_file() ), None, ) if path is None: logging.exception("SbmlReader: Invalid SBML path %s", path) raise ValueError("Invalid SBML model") # Read SBML file: try to uncompress first, read directly otherwise. with gzip.open(path, "rt") as fh: try: logging.debug("SbmlReader: %s try to read as compressed", path) xml_tree = etree.parse(fh).getroot() except gzip.BadGzipFile: logging.debug("SbmlReader: %s is not compressed, read directly", path) try: xml_tree = etree.parse(str(path)).getroot() except SyntaxError as etree_exc: logging.exception("SbmlReader: Invalid SBML model in %s", path) raise ValueError("Invalid SBML model") from etree_exc sbml_model = None taxon: None | Taxon = None for e in xml_tree: taxon = Taxon(e.attrib.get("id") or path.stem) if not e.attrib.get("id"): logging.warning("No SBML id in %s, using %s", path.name, taxon) if e.tag[0] == "{": # ignore uri _, tag = e.tag[1:].split("}") else: tag = e.tag if tag == "model": sbml_model = e break if sbml_model is None: logging.exception("SbmlReader: Invalid SBML model in %s", path) raise ValueError("Invalid SBML model") return taxon, sbml_model
[docs] @staticmethod def decode_relations(taxon: Taxon, sbml_model: Element) -> Model: """ Search for reaction-metabolite relations, also including reversed relations for those reactions that are reversible. """ # Get namespace from this SBML model. ns = re.search(r"\{.*\}|$", sbml_model.tag).group() # type: ignore[union-attr] # XPath search terms for reactions, metabolites wrt reactions, species # using namespace from this SBML model. xpath_reaction = [ f".//{ ns }reaction{ filter }" for filter in ["", '[@reversible="true"]'] ] xpath_role = [ f"./{ ns }listOf{ role }/{ ns }speciesReference" for role in ["Products", "Reactants"] ] xpath_species = f".//{ ns }species" reac_suffix = ["", "rev"] # Decode species-taxon relations. biomolecule = { Metabolite( provenance=taxon, biomolecule=Biomolecule(str(elt.attrib.get("id"))) ) for elt in sbml_model.findall(xpath_species) } # Decode species-reaction-taxon relations. cases = { "product": [(0, 0), (1, 1)], "reactant": [(0, 1), (1, 0)], } relations = {} for role, role_directions in cases.items(): relations[role] = { ( Metabolite( biomolecule=Biomolecule(str(biomolecule.attrib.get("species"))), provenance=taxon, ), Reaction( name=str(reaction.attrib.get("id")) + reac_suffix[reac_direction], taxon=taxon, ), taxon, ) for reac_direction, role_invert in role_directions for reaction in sbml_model.findall(xpath_reaction[reac_direction]) for biomolecule in reaction.findall(xpath_role[role_invert]) } # Return the sbml_mModel return Model(biomolecule=biomolecule, relations=relations)
[docs] def normalize(self, path: Path, sbml_dir: Path | None = None): """ Try to relative this SBML file name to a sbml_dir, either the one specified as a parameter or the default one encapsulated in the reader. """ if root := sbml_dir or self.sbml_dir: try: return path.relative_to(root) except ValueError: return path return path
[docs] def __str__(self) -> str: """String representation of class SbmlReader.""" return f"Model<{hex(id(self))}>"