# -*- coding: utf-8 -*-
"""
CoCoMiCo cwl endpoint.
"""
import logging
import sys
from importlib.resources import as_file, files
from pathlib import Path
from typing import Iterable, List, Sequence, Tuple
import click
from cocomico import report, score
from cocomico.base import Biomolecule, Seeds
from cocomico.community import Community
# Reusable option decorators
option_seeds_file = click.option(
"--seeds-file",
multiple=True,
help="SBML file defining seed biomolecules",
type=click.Path(path_type=Path, exists=True),
)
option_seeds_def = click.option(
"--seeds-def",
multiple=True,
help="Comma-separated list of seed biomolecules",
type=click.STRING,
)
option_sbml_dir = click.option(
"--sbml-dir",
help="Directory containing SBML model files",
type=click.Path(dir_okay=True, path_type=Path, exists=True, file_okay=False),
)
option_csv_output = click.option(
"--csv-output",
help="Path to write CSV output",
type=click.Path(writable=True, path_type=Path),
)
option_header_csv = click.option(
"--header-csv/--no-header-csv",
help="Add header to CSV output",
default=True,
is_flag=True,
)
option_name = click.option(
"--name",
default="community",
help="Community name (and default output filename stem)",
type=click.STRING,
)
option_output = click.option(
"--output",
help="Directory to write output, created if necessary",
type=click.Path(writable=True, dir_okay=True, path_type=Path),
)
# Main command.
@click.group()
@click.option(
"--loglevel",
default="INFO",
help="Logging level INFO, DEBUG, WARN",
type=click.STRING,
)
@click.option(
"--symbolic-seeds/--no-symbolic-seeds",
default=False,
help="CSV column seeds uses symbolic name",
is_flag=True,
)
@click.option(
"--ignore-seed-reactions/--no-ignore-seed-reactions",
default=False,
help="Ignore reactions that would add seeds",
is_flag=True,
)
@option_output
@option_seeds_file
@option_seeds_def
@option_sbml_dir
@option_csv_output
@option_header_csv
@option_name
@click.pass_context
def entrypoint(
# pylint: disable=R0913
ctx: click.Context,
seeds_file: List[Path] | None = None,
seeds_def: List[str] | None = None,
sbml_dir: Path | None = None,
output: Path | None = None,
csv_output: Path | None = None,
header_csv: bool = True,
name: str = "community",
loglevel: str = "INFO",
symbolic_seeds: bool = False,
ignore_seed_reactions: bool = False,
):
"""
Perform CoCoMiCo analyses for communities, with respect to seeds.
::
cocomico community
--seeds-file seeds.sbml
--json-output results.json
--csv-output results.csv
sbml/Com2Org*.sbml
"""
# Logging
loglevel_int = getattr(logging, loglevel.upper(), logging.INFO)
logging.basicConfig(format="%(asctime)s %(message)s", level=loglevel_int)
logging.debug("Setting loglevel to %s = %s", loglevel, loglevel_int)
# Global options
ctx.ensure_object(dict)
ctx.obj["sbml_dir"] = sbml_dir
ctx.obj["csv_output"] = csv_output
ctx.obj["header_csv"] = header_csv
ctx.obj["output"] = output
ctx.obj["name"] = name
ctx.obj["symbolic_seeds"] = symbolic_seeds
ctx.obj["ignore_seed_reactions"] = ignore_seed_reactions
# Seeds
seeds_todo: list[Seeds] = retrieve_seeds(seeds_file=seeds_file, seeds_def=seeds_def)
ctx.obj["seeds_todo"] = seeds_todo
@entrypoint.command()
@option_output
@option_seeds_file
@option_seeds_def
@option_sbml_dir
@option_csv_output
@option_header_csv
@option_name
@click.option(
"--json-output",
help="Path to write a single JSON output or '-'",
type=click.Path(writable=True, allow_dash=True, path_type=Path),
)
@click.argument("model", nargs=-1, type=click.Path(exists=True))
@click.pass_context
def community(
# pylint: disable=R0913
ctx: click.Context,
output: Path | None = None,
seeds_file: List[Path] | None = None,
seeds_def: List[str] | None = None,
sbml_dir: Path | None = None,
csv_output: Path | None = None,
header_csv: bool = True,
name: str = "community",
json_output: Path | None = None,
model: Iterable[Path] | None = None,
):
"""
CoCoMiCo for a community defined by the MODELs, or by all models in sbml_dir.
::
cocomico community
--seeds-def M_E_c,M_F_c,M_X1_c
--seeds-file seeds.sbml
--json-output results.json
--csv-output results.csv
sbml/Com2Org*.sbml
"""
logging.info("Command community {%s}", " ".join(Path(m).stem for m in model or []))
ignore_seed_reactions = ctx.obj["ignore_seed_reactions"]
communities: dict[str, Community] = {}
name = name or ctx.obj["name"]
if output and not csv_output:
csv_output = output / f"{ name }.csv"
if output and not json_output:
json_output = output / f"{ name }.json"
# Seeds
seeds_todo: list[Seeds] = (
retrieve_seeds(seeds_file=seeds_file, seeds_def=seeds_def)
or ctx.obj["seeds_todo"]
)
if not seeds_todo:
logging.warning("No seeds provided")
# Construct community from model files, either specified with absolute paths
# or relative to sbml_dir if given.
model_paths = [Path(f) for f in model] if model else []
try:
communities[name] = Community(sbml_dir=sbml_dir, models=model_paths)
except ValueError:
logging.warning(
"Ignoring invalid SBML in %s", " ".join(m.name for m in model_paths)
)
# pass; ignore files that don't have valid SBML.
if not communities:
logging.warning("No communities provided, skipping")
return
# Post-process communities to ignore seed reactions
if ignore_seed_reactions:
comm = communities[name]
logging.info("Removing seed reactions from [%s] %s", name, comm)
logging.debug("Before: [%s] %s %d reactions", name, comm, len(comm.reactions))
communities[name].remove_seed_reactions()
logging.debug("After: [%s] %s %d reactions", name, comm, len(comm.reactions))
# Process communities.
process_communities(
seeds_todo=seeds_todo,
communities=communities,
output=output,
json_output={name: json_output},
csv_output=csv_output,
header_csv=header_csv if header_csv is not None else ctx.obj["header_csv"],
symbolic_seeds=ctx.obj["symbolic_seeds"],
)
[docs]
def find_specs(
paths: Iterable[Path], sbml_dir: Path
) -> Iterable[Tuple[str, Community]]:
"""
Find all communities in given spec files or directories of
spec files.
"""
for path in paths:
specs = list(path.iterdir()) if path.is_dir() else [path]
logging.info("Find %s: [%s]", path, " ".join(str(i) for i in specs))
for spec in specs:
try:
logging.info("Find: Reading spec %s", spec.name)
communities = Community.from_specification(spec=spec, sbml_dir=sbml_dir)
for i, c in communities.items():
logging.debug("Find: Valid spec %s: %s %s", spec.name, i, str(c))
yield (i, c)
except ValueError:
logging.exception(
"Find: Invalid SBML in community specification %s", spec.name
)
sys.exit(1)
@entrypoint.command()
@option_output
@option_seeds_file
@option_seeds_def
@option_sbml_dir
@option_csv_output
@option_header_csv
@click.option(
"--spec",
help="JSON file that specifies communities",
type=click.Path(path_type=Path, exists=True),
multiple=True,
)
@click.argument("argspec", nargs=-1, type=click.Path(exists=True, path_type=Path))
@click.pass_context
def specification(
# pylint: disable=R0913
ctx: click.Context,
spec: List[Path],
argspec: List[Path],
output: Path | None = None,
seeds_file: List[Path] | None = None,
seeds_def: List[str] | None = None,
sbml_dir: Path | None = None,
csv_output: Path | None = None,
header_csv: bool = True,
):
"""
CoCoMiCo for all communities defined by a specification file.
::
cocomico spec
--spec communities.json
--seeds-file seeds.sbml
--output results
"""
logging.info("Command specification %s", " ".join(p.name for p in spec + argspec))
sbml_dir = sbml_dir or ctx.obj["sbml_dir"]
ignore_seed_reactions = ctx.obj["ignore_seed_reactions"]
seeds_todo: list[Seeds] = (
retrieve_seeds(seeds_file=seeds_file, seeds_def=seeds_def)
or ctx.obj["seeds_todo"]
)
if not seeds_todo:
logging.warning("Specification: No seeds provided, assume self-sufficient")
communities: dict[str, Community] = dict(
find_specs(paths=spec + argspec, sbml_dir=sbml_dir)
)
if not communities:
logging.warning("Specification: No communities provided, nothing to do")
return
# Post-process communities to ignore seed reactions
if ignore_seed_reactions:
for comm in communities.values():
comm.remove_seed_reactions()
json_args: dict[str, Path | None] | None = (
{name: output / f"{name}.json" for name in communities.keys()}
if output
else None
)
# Process communities.
process_communities(
seeds_todo=seeds_todo,
communities=communities,
output=output,
json_output=json_args,
csv_output=csv_output or ctx.obj["csv_output"],
header_csv=header_csv if header_csv is not None else ctx.obj["header_csv"],
symbolic_seeds=ctx.obj["symbolic_seeds"],
)
@entrypoint.command()
@option_output
@option_header_csv
@click.pass_context
def example(
ctx: click.Context,
output: Path,
header_csv: bool = True,
):
"""
CoCoMiCo for a small example community.
\b
cocomico example --output results
"""
logging.info("Command example")
example_dir = files("cocomico.example")
with (
as_file(example_dir / "seeds.sbml") as seeds,
as_file(example_dir / "communities.json") as spec,
as_file(example_dir / "sbml") as sbml,
):
seeds_todo: list[Seeds] = retrieve_seeds(seeds_file=[seeds])
communities = Community.from_specification(spec=spec, sbml_dir=sbml)
wd = output if output else Path(".")
# Process communities.
process_communities(
seeds_todo=seeds_todo,
communities=communities,
output=output,
json_output={name: wd / f"{name}.json" for name in communities.keys()},
csv_output=wd / "example.csv",
header_csv=header_csv if header_csv is not None else ctx.obj["header_csv"],
symbolic_seeds=ctx.obj["symbolic_seeds"],
)
[docs]
def process_communities(
# pylint: disable=R0913
seeds_todo: Iterable[Seeds],
communities: dict[str, Community],
output: Path | None = None,
json_output: dict[str, Path | None] | None = None,
csv_output: Path | None = None,
header_csv: bool = True,
symbolic_seeds: bool = False,
):
"""
Analyze each community with respect to every set of seeds.
"""
logging.info("Processing communities %s", " ".join(communities.keys()))
for name, comm in communities.items():
logging.info(
"Community [%s] %s %d models, %d reactions, %d biomolecules",
name,
str(comm),
len(comm),
len(comm.reactions),
len(comm.biomolecule),
)
logging.info(
" Members [%s] %s {%s}", name, str(comm), " ".join(sorted(comm.keys()))
)
logging.info(
"Output %s csv %s json %s",
output,
csv_output,
" ".join(str(f) for f in json_output.values()) if json_output else None,
)
# Assure output directory exists, because json_output and
# csv_output may be relative paths.
if output is not None:
output.mkdir(parents=True, exist_ok=True)
for name, comm in communities.items():
# Analyze community with respect to every set of seeds.
for s in seeds_todo:
logging.info("Process %s on seeds to do %s", str(comm), str(s))
_ = comm.scope(s)
# Metrics are cached in score
_ = score.cooperation(comm, s)
_ = score.competition(comm, s)
_ = score.delta(comm, s)
_ = score.rho(comm, s)
# JSON output if requested, to file or to stdout
if json_output and json_output[name]:
dash = str(json_output[name]) == "-"
payload = report.from_community(
comm, with_results=True, seeds_todo=seeds_todo
)
json_output_path = json_output[name] or f"{name}.json"
logging.info("Request JSON output for %s to %s", name, json_output_path)
with (
open(json_output_path, mode="w+", encoding="UTF-8")
if not dash
else sys.stdout
) as json_file:
json_file.write(payload)
json_file.write("\n")
# clear cache
comm.clear_answers()
# CSV output if requested, to file
logging.info(
"Request tabular output for communities %s", " ".join(communities.keys())
)
for name, comm in communities.items():
logging.info(" [%s] %s {%s}", name, str(comm), " ".join(sorted(comm.keys())))
logging.info(" [%s] results %s", name, "".join(str(i) for i in seeds_todo))
if csv_output:
logging.info("Writing CSV output to %s", csv_output)
report.write_tabular(
communities=communities,
output_file=Path(csv_output),
header=header_csv,
seeds_todo=seeds_todo,
symbolic_seeds=symbolic_seeds,
)
[docs]
def retrieve_seeds(
seeds_file: Sequence[Path] | None = None,
seeds_def: Sequence[str] | None = None,
) -> list[Seeds]:
"""
Combine seeds specified in an SBML file and seeds specified in-line
as comma-separated strings. May be empty: the group-level seeds option
is often unspecified.
"""
if seeds_file is None:
seeds_file = []
if seeds_def is None:
seeds_def = []
# Seeds specified in-line as comma-separated strings
seeds_todo: List[Seeds] = [
Seeds({Biomolecule(i) for i in spec.split(",")}) for spec in seeds_def
]
for todo in seeds_todo:
logging.info("Seeds to do, specified in-line: %s", repr(todo))
# Seeds specified in SBML files
for f in list(seeds_file):
if f.is_file():
try:
todo = Seeds.from_file(f)
logging.info("Seeds to do, from %s: %s", f.name, repr(todo))
seeds_todo.append(todo)
except ValueError:
logging.warning("Ignoring invalid seeds in %s", f.name)
return seeds_todo