Source code for cocomico.cwl

# -*- coding: utf-8 -*-

"""
CoCoMiCo cwl endpoint.
"""

import logging
import sys
from importlib.resources import as_file, files
from pathlib import Path
from typing import Iterable, List, Sequence, Tuple

import click

from cocomico import report, score
from cocomico.base import Biomolecule, Seeds
from cocomico.community import Community

# Reusable option decorators

option_seeds_file = click.option(
    "--seeds-file",
    multiple=True,
    help="SBML file defining seed biomolecules",
    type=click.Path(path_type=Path, exists=True),
)
option_seeds_def = click.option(
    "--seeds-def",
    multiple=True,
    help="Comma-separated list of seed biomolecules",
    type=click.STRING,
)
option_sbml_dir = click.option(
    "--sbml-dir",
    help="Directory containing SBML model files",
    type=click.Path(dir_okay=True, path_type=Path, exists=True, file_okay=False),
)
option_csv_output = click.option(
    "--csv-output",
    help="Path to write CSV output",
    type=click.Path(writable=True, path_type=Path),
)
option_header_csv = click.option(
    "--header-csv/--no-header-csv",
    help="Add header to CSV output",
    default=True,
    is_flag=True,
)
option_name = click.option(
    "--name",
    default="community",
    help="Community name (and default output filename stem)",
    type=click.STRING,
)
option_output = click.option(
    "--output",
    help="Directory to write output, created if necessary",
    type=click.Path(writable=True, dir_okay=True, path_type=Path),
)


# Main command.


@click.group()
@click.option(
    "--loglevel",
    default="INFO",
    help="Logging level INFO, DEBUG, WARN",
    type=click.STRING,
)
@click.option(
    "--symbolic-seeds/--no-symbolic-seeds",
    default=False,
    help="CSV column seeds uses symbolic name",
    is_flag=True,
)
@click.option(
    "--ignore-seed-reactions/--no-ignore-seed-reactions",
    default=False,
    help="Ignore reactions that would add seeds",
    is_flag=True,
)
@option_output
@option_seeds_file
@option_seeds_def
@option_sbml_dir
@option_csv_output
@option_header_csv
@option_name
@click.pass_context
def entrypoint(
    # pylint: disable=R0913
    ctx: click.Context,
    seeds_file: List[Path] | None = None,
    seeds_def: List[str] | None = None,
    sbml_dir: Path | None = None,
    output: Path | None = None,
    csv_output: Path | None = None,
    header_csv: bool = True,
    name: str = "community",
    loglevel: str = "INFO",
    symbolic_seeds: bool = False,
    ignore_seed_reactions: bool = False,
):
    """
    Perform CoCoMiCo analyses for communities, with respect to seeds.

    ::

        cocomico community
            --seeds-file seeds.sbml
            --json-output results.json
            --csv-output results.csv
            sbml/Com2Org*.sbml
    """
    # Logging
    loglevel_int = getattr(logging, loglevel.upper(), logging.INFO)
    logging.basicConfig(format="%(asctime)s %(message)s", level=loglevel_int)
    logging.debug("Setting loglevel to %s = %s", loglevel, loglevel_int)

    # Global options
    ctx.ensure_object(dict)
    ctx.obj["sbml_dir"] = sbml_dir
    ctx.obj["csv_output"] = csv_output
    ctx.obj["header_csv"] = header_csv
    ctx.obj["output"] = output
    ctx.obj["name"] = name
    ctx.obj["symbolic_seeds"] = symbolic_seeds
    ctx.obj["ignore_seed_reactions"] = ignore_seed_reactions

    # Seeds
    seeds_todo: list[Seeds] = retrieve_seeds(seeds_file=seeds_file, seeds_def=seeds_def)
    ctx.obj["seeds_todo"] = seeds_todo


@entrypoint.command()
@option_output
@option_seeds_file
@option_seeds_def
@option_sbml_dir
@option_csv_output
@option_header_csv
@option_name
@click.option(
    "--json-output",
    help="Path to write a single JSON output or '-'",
    type=click.Path(writable=True, allow_dash=True, path_type=Path),
)
@click.argument("model", nargs=-1, type=click.Path(exists=True))
@click.pass_context
def community(
    # pylint: disable=R0913
    ctx: click.Context,
    output: Path | None = None,
    seeds_file: List[Path] | None = None,
    seeds_def: List[str] | None = None,
    sbml_dir: Path | None = None,
    csv_output: Path | None = None,
    header_csv: bool = True,
    name: str = "community",
    json_output: Path | None = None,
    model: Iterable[Path] | None = None,
):
    """
    CoCoMiCo for a community defined by the MODELs, or by all models in sbml_dir.

    ::

        cocomico community
            --seeds-def M_E_c,M_F_c,M_X1_c
            --seeds-file seeds.sbml
            --json-output results.json
            --csv-output results.csv
            sbml/Com2Org*.sbml
    """
    logging.info("Command community {%s}", " ".join(Path(m).stem for m in model or []))
    ignore_seed_reactions = ctx.obj["ignore_seed_reactions"]

    communities: dict[str, Community] = {}
    name = name or ctx.obj["name"]
    if output and not csv_output:
        csv_output = output / f"{ name }.csv"
    if output and not json_output:
        json_output = output / f"{ name }.json"

    # Seeds
    seeds_todo: list[Seeds] = (
        retrieve_seeds(seeds_file=seeds_file, seeds_def=seeds_def)
        or ctx.obj["seeds_todo"]
    )
    if not seeds_todo:
        logging.warning("No seeds provided")

    # Construct community from model files, either specified with absolute paths
    # or relative to sbml_dir if given.
    model_paths = [Path(f) for f in model] if model else []
    try:
        communities[name] = Community(sbml_dir=sbml_dir, models=model_paths)
    except ValueError:
        logging.warning(
            "Ignoring invalid SBML in %s", " ".join(m.name for m in model_paths)
        )
        # pass; ignore files that don't have valid SBML.

    if not communities:
        logging.warning("No communities provided, skipping")
        return

    # Post-process communities to ignore seed reactions
    if ignore_seed_reactions:
        comm = communities[name]
        logging.info("Removing seed reactions from [%s] %s", name, comm)
        logging.debug("Before: [%s] %s %d reactions", name, comm, len(comm.reactions))
        communities[name].remove_seed_reactions()
        logging.debug("After: [%s] %s %d reactions", name, comm, len(comm.reactions))

    # Process communities.
    process_communities(
        seeds_todo=seeds_todo,
        communities=communities,
        output=output,
        json_output={name: json_output},
        csv_output=csv_output,
        header_csv=header_csv if header_csv is not None else ctx.obj["header_csv"],
        symbolic_seeds=ctx.obj["symbolic_seeds"],
    )


[docs] def find_specs( paths: Iterable[Path], sbml_dir: Path ) -> Iterable[Tuple[str, Community]]: """ Find all communities in given spec files or directories of spec files. """ for path in paths: specs = list(path.iterdir()) if path.is_dir() else [path] logging.info("Find %s: [%s]", path, " ".join(str(i) for i in specs)) for spec in specs: try: logging.info("Find: Reading spec %s", spec.name) communities = Community.from_specification(spec=spec, sbml_dir=sbml_dir) for i, c in communities.items(): logging.debug("Find: Valid spec %s: %s %s", spec.name, i, str(c)) yield (i, c) except ValueError: logging.exception( "Find: Invalid SBML in community specification %s", spec.name ) sys.exit(1)
@entrypoint.command() @option_output @option_seeds_file @option_seeds_def @option_sbml_dir @option_csv_output @option_header_csv @click.option( "--spec", help="JSON file that specifies communities", type=click.Path(path_type=Path, exists=True), multiple=True, ) @click.argument("argspec", nargs=-1, type=click.Path(exists=True, path_type=Path)) @click.pass_context def specification( # pylint: disable=R0913 ctx: click.Context, spec: List[Path], argspec: List[Path], output: Path | None = None, seeds_file: List[Path] | None = None, seeds_def: List[str] | None = None, sbml_dir: Path | None = None, csv_output: Path | None = None, header_csv: bool = True, ): """ CoCoMiCo for all communities defined by a specification file. :: cocomico spec --spec communities.json --seeds-file seeds.sbml --output results """ logging.info("Command specification %s", " ".join(p.name for p in spec + argspec)) sbml_dir = sbml_dir or ctx.obj["sbml_dir"] ignore_seed_reactions = ctx.obj["ignore_seed_reactions"] seeds_todo: list[Seeds] = ( retrieve_seeds(seeds_file=seeds_file, seeds_def=seeds_def) or ctx.obj["seeds_todo"] ) if not seeds_todo: logging.warning("Specification: No seeds provided, assume self-sufficient") communities: dict[str, Community] = dict( find_specs(paths=spec + argspec, sbml_dir=sbml_dir) ) if not communities: logging.warning("Specification: No communities provided, nothing to do") return # Post-process communities to ignore seed reactions if ignore_seed_reactions: for comm in communities.values(): comm.remove_seed_reactions() json_args: dict[str, Path | None] | None = ( {name: output / f"{name}.json" for name in communities.keys()} if output else None ) # Process communities. process_communities( seeds_todo=seeds_todo, communities=communities, output=output, json_output=json_args, csv_output=csv_output or ctx.obj["csv_output"], header_csv=header_csv if header_csv is not None else ctx.obj["header_csv"], symbolic_seeds=ctx.obj["symbolic_seeds"], ) @entrypoint.command() @option_output @option_header_csv @click.pass_context def example( ctx: click.Context, output: Path, header_csv: bool = True, ): """ CoCoMiCo for a small example community. \b cocomico example --output results """ logging.info("Command example") example_dir = files("cocomico.example") with ( as_file(example_dir / "seeds.sbml") as seeds, as_file(example_dir / "communities.json") as spec, as_file(example_dir / "sbml") as sbml, ): seeds_todo: list[Seeds] = retrieve_seeds(seeds_file=[seeds]) communities = Community.from_specification(spec=spec, sbml_dir=sbml) wd = output if output else Path(".") # Process communities. process_communities( seeds_todo=seeds_todo, communities=communities, output=output, json_output={name: wd / f"{name}.json" for name in communities.keys()}, csv_output=wd / "example.csv", header_csv=header_csv if header_csv is not None else ctx.obj["header_csv"], symbolic_seeds=ctx.obj["symbolic_seeds"], )
[docs] def process_communities( # pylint: disable=R0913 seeds_todo: Iterable[Seeds], communities: dict[str, Community], output: Path | None = None, json_output: dict[str, Path | None] | None = None, csv_output: Path | None = None, header_csv: bool = True, symbolic_seeds: bool = False, ): """ Analyze each community with respect to every set of seeds. """ logging.info("Processing communities %s", " ".join(communities.keys())) for name, comm in communities.items(): logging.info( "Community [%s] %s %d models, %d reactions, %d biomolecules", name, str(comm), len(comm), len(comm.reactions), len(comm.biomolecule), ) logging.info( " Members [%s] %s {%s}", name, str(comm), " ".join(sorted(comm.keys())) ) logging.info( "Output %s csv %s json %s", output, csv_output, " ".join(str(f) for f in json_output.values()) if json_output else None, ) # Assure output directory exists, because json_output and # csv_output may be relative paths. if output is not None: output.mkdir(parents=True, exist_ok=True) for name, comm in communities.items(): # Analyze community with respect to every set of seeds. for s in seeds_todo: logging.info("Process %s on seeds to do %s", str(comm), str(s)) _ = comm.scope(s) # Metrics are cached in score _ = score.cooperation(comm, s) _ = score.competition(comm, s) _ = score.delta(comm, s) _ = score.rho(comm, s) # JSON output if requested, to file or to stdout if json_output and json_output[name]: dash = str(json_output[name]) == "-" payload = report.from_community( comm, with_results=True, seeds_todo=seeds_todo ) json_output_path = json_output[name] or f"{name}.json" logging.info("Request JSON output for %s to %s", name, json_output_path) with ( open(json_output_path, mode="w+", encoding="UTF-8") if not dash else sys.stdout ) as json_file: json_file.write(payload) json_file.write("\n") # clear cache comm.clear_answers() # CSV output if requested, to file logging.info( "Request tabular output for communities %s", " ".join(communities.keys()) ) for name, comm in communities.items(): logging.info(" [%s] %s {%s}", name, str(comm), " ".join(sorted(comm.keys()))) logging.info(" [%s] results %s", name, "".join(str(i) for i in seeds_todo)) if csv_output: logging.info("Writing CSV output to %s", csv_output) report.write_tabular( communities=communities, output_file=Path(csv_output), header=header_csv, seeds_todo=seeds_todo, symbolic_seeds=symbolic_seeds, )
[docs] def retrieve_seeds( seeds_file: Sequence[Path] | None = None, seeds_def: Sequence[str] | None = None, ) -> list[Seeds]: """ Combine seeds specified in an SBML file and seeds specified in-line as comma-separated strings. May be empty: the group-level seeds option is often unspecified. """ if seeds_file is None: seeds_file = [] if seeds_def is None: seeds_def = [] # Seeds specified in-line as comma-separated strings seeds_todo: List[Seeds] = [ Seeds({Biomolecule(i) for i in spec.split(",")}) for spec in seeds_def ] for todo in seeds_todo: logging.info("Seeds to do, specified in-line: %s", repr(todo)) # Seeds specified in SBML files for f in list(seeds_file): if f.is_file(): try: todo = Seeds.from_file(f) logging.info("Seeds to do, from %s: %s", f.name, repr(todo)) seeds_todo.append(todo) except ValueError: logging.warning("Ignoring invalid seeds in %s", f.name) return seeds_todo