Source code for rnets.parser

# -*- coding: utf-8 -*-
"""Parse csv files to build compound/reaction/graph objects.

Attributes:
    S (type): Type constraint for StrEnum.
    T (type): Type variable.
    O (type): Type variable.
"""

from collections.abc import Callable, Sequence
from enum import auto, StrEnum
from functools import reduce
from itertools import chain
from pathlib import Path
from .struct import Compound, FFlags, Network, Reaction, Visibility


[docs] class CompoundCol(StrEnum): """Posible values for the compound columns.""" Energy = auto() Name = auto() Visible = auto() Opts = auto() Fflags = auto() Conc = auto()
[docs] class Direction(StrEnum): """Direction of a reaction. Can be Left, Right or Bidirectional.""" Left = "<-" Right = "->" Biderectional = "<->"
[docs] class ReactionCol(StrEnum): """Possible values for the reaction columns.""" CLeft = auto() CRight = auto() Energy = auto() Direction = auto() Visible = auto() Name = auto() Opts = auto()
REQ_COMP_COL: set[CompoundCol] = set(( CompoundCol.Name , CompoundCol.Energy )) REQ_REACT_COL: set[ReactionCol] = set(( ReactionCol.CLeft , ReactionCol.CRight , ReactionCol.Energy ))
[docs] def assure_compound( cn: str , cs: Sequence[Compound] ) -> Compound: """This function serves a wrapper to raise an error in case that the compound is not on the list. Args: cn (:obj:`Compound`): Candidate compound name. cs (sequence of :obj:`Compound`): Available candidates. Returns: Compound: In case that the compound is in the sequence, return the :obj:`Compound` with the given name. Raises: :obj:`ValueError`: If the name is not present in cs. """ try: return next(filter(lambda c: c.name == cn, cs)) except StopIteration: raise ValueError( f"Compound of name {cn} in reactions not found in compounds" )
[docs] def apply_maybe[T, O]( fn: Callable[[T,], O] , s: T | None ) -> O | None: """Given a function fn that takes as an input a value of type T and returns a value of type an optional value of type T and O, apply fn only s is not None. Args: fn (function): Function that takes a value of type T as input and returns a value of Type O. s (any value or None): Value to be used as the input value. Returns: Either the projected O value or None. """ match s: case None: return None case _: return fn(s)
[docs] def parse_vis( s: str , default: Visibility = Visibility.TRUE ) -> Visibility: """Parses a string representing the visibility of the node. It can be set to true, false or grey. Args: s (str): String to parse. default (:obj:`Visibility` or None, optional): Default value to return if parsing fails. Defaults to :obj:`Visibility.TRUE`. Returns: :obj:`Visibility`: Value corresponding to the parsed statement. Note: The value is lowercased and compared before parsing. t and true and parsed as :obj:`Visibility.TRUE`, f and false are parsed as :obj:`Visibility.FALSE` and g and grey are parsed as :obj:`Visibility.GREY`. """ match s.lower(): case 't' | 'true': return Visibility.TRUE case 'f' | 'false': return Visibility.FALSE case 'g' | 'grey' | 'gray': return Visibility.GREY case _: return default
[docs] def parse_compounds( s: str , req: set[CompoundCol] = REQ_COMP_COL ) -> tuple[Compound, ...]: """Parse compounds in a given string. Args: s (str): String containing the header and the compounds separated by new lines. req (set of :obj:`CompoundCol`, optional): Required header values. Defaults to :obj:`REQ_COMP_COL`. Returns: tuple of :obj:`Compound`: A tuple containing the parsed compounds. """ return parse_lines( s , CompoundCol , req , parse_compound_line )
[docs] def parse_compounds_from_file( f: str | Path ) -> tuple[Compound, ...]: """Wrapper of :obj:`parse_compounds` but using a file as input. Args: f (str or :obj:`Path`): Name of the file. Returns: A tuple containing the parsed :obj:`Compound` """ with open(f, 'r') as infile: return parse_compounds(infile.read(), REQ_COMP_COL)
[docs] def parse_compound_line( idx: int , l: str , h: Sequence[CompoundCol] = tuple(CompoundCol) ) -> Compound: """ Parse a compound line. Args: idx (int): Index of the compound. l (str): Compound line, with values separated by comma. h (sequence of :obj:`CompoundCol`): Order of the columns. Defaults to the order of :obj:`CompoundCol`. Returns: :obj:`Compound`: with the given values. Note: Check :obj:`Compound` for possible values and :obj:`REQ_COMP_COL` for required values. """ kw: dict[CompoundCol, str] = dict( zip(h, l.split(',')) ) vis: str | None = kw.get(CompoundCol.Visible) return Compound( name=kw[CompoundCol.Name] , energy=float(kw[CompoundCol.Energy]) , idx=idx , visible=Visibility.TRUE if vis is None else parse_vis(vis) , fflags=apply_maybe(parse_fflags, kw.get(CompoundCol.Fflags)) , conc=apply_maybe(parse_conc, kw.get(CompoundCol.Conc)) , opts=apply_maybe(parse_opts, kw.get(CompoundCol.Opts)))
[docs] def parse_conc( s: str ) -> float | None: """ Parse concentration value Args: s (str): String to parse. Returns: float with the parsed value or None """ try: return float(s) except ValueError: return None
[docs] def parse_fflags( s: str ) -> set[FFlags]: """Parse format flags. s (str): String to parse. Should contain the format options separated by colons. Returns: set of :obj:`FFlags`: Set containing the parsed format flags. Note: The format flags are: 'i' for italics, 'b' for bold and 'u' for underscore. E.g. i:b. """ return set(filter( lambda f: f in s.split(":") , list(FFlags)))
[docs] def parse_lines[T, S: StrEnum]( s: str , h: type[S] , r: set[S] , fn: Callable[[int, str, list[S]], T] ) -> tuple[T, ...]: """Parse a string containing compounds or intermediates in multiple lines. Args: s (str): String to parse. h (:obj:`StrEnum`): Possible column values. r (set of str): Required column values. fn (:obj:`Callable[int, str, S] -> T`): Function to parse a single line. Returns: tuple of T: Where T with the parsed values. Where T is the output type of :attr:fn. """ rh, *ls = s.splitlines() ph: list[S] = list(map(h, rh.split(','))) if not r.issubset(h): raise ValueError( f"Missing column value. Required: {', '.join(r)}" ) return tuple(map( lambda xs: fn(xs[0], xs[1], ph) , enumerate(ls) ))
[docs] def parse_network( sc: str , sr: str ) -> Network: """Parse two strings, one containing the compounds of the network and a second one containing the reactions of the network. To check how they will be parsed, the reader is redicted to the :obj:`parse_compounds` and :obj:`parse_reactions` functions. Args: sc (str): String containing the header and the compounds separated by new lines. sr (str): String containing the header and the reactions separated by new lines. Returns: :obj:Network: Network with the parsed compounds and reactions. """ cs: tuple[Compound, ...] = parse_compounds(sc, REQ_COMP_COL) return Network( compounds=cs , reactions=parse_reactions(sr, cs, REQ_REACT_COL) )
[docs] def parse_network_from_file( cf: str | Path , rf: str | Path ) -> Network: """Wrapper of :obj:`parse_network` but using files as input. Args: cf (str or :obj:`Path`): Name of the file containing the compounds. rf (str or :obj:`Path`): Name of the file containing the reactions. Returns: :obj:Network: Network with the parsed compounds and reactions. """ with open(cf, 'r') as infile: sc = infile.read() with open(rf, 'r') as infile: sr = infile.read() return parse_network(sc, sr)
[docs] def parse_opts( s: str ) -> dict[str, str] | None: """Parses the opts column associated with additional opt options. Args: s (str): String containing the pydot options sopearated by ;. Returns: dict of [str, str]: keys and values with the opts parsed. Note: Each option should have a label and a value separated by a '=', and different options should be separated with a ':'. """ if s == '': return None return dict(map( lambda x: x.split('=') , s.split(':') ))
[docs] def parse_reactions( s: str , cs: Sequence[Compound] , req: set[ReactionCol] = REQ_REACT_COL ) -> tuple[Reaction, ...]: """Parse reactions in a given string. Args: s (str): String containing the header and the reactions separated by new lines. req (set of :obj:`ReactionCol`, optional): Required header values. Defaults to :obj:`REQ_COMP_COL` Returns: tuple of :obj:`Reaction`: Containing the parsed reactions. """ return tuple(chain.from_iterable(parse_lines( s , ReactionCol , req , lambda idx, l, h: parse_reaction_line(idx, l, cs, h) )))
[docs] def parse_reactions_from_file( f: str | Path , cs: Sequence[Compound] ) -> tuple[Reaction, ...]: """Wrapper of :obj:`parse_reactions` but using a file as input. Args: f (str or Path): Name of the file. Returns: A tuple containing the parsed :obj:`Reaction` """ with open(f, 'r') as infile: return parse_reactions(infile.read(), cs, REQ_REACT_COL)
[docs] def parse_reaction_line( idx: int , l: str , cs: Sequence[Compound] , h: Sequence[ReactionCol] = tuple(ReactionCol) ) -> tuple[Reaction, ...]: """ Parse a reaction line. Args: idx (int): Index of the reaction l (str): Reaction line. cs (sequence of :obj:`Compound`): Sequence of compounds. It will be used to get the :obj:`Compound` (by matching name) and reference in the reaction instead of using the name. h (sequence of :obj:`ReactionCol`): Order of the columns. Defaults to the order of :obj:`ReactionCol`. Returns: :obj:`Reaction`: With the parsed values. Note: Check :obj:`Reaction` for possible values and :obj:`REQ_REACT_COL` for required values. Bidirectional reactions will return 2 reactions sharing the same idx. idx is intended to track the order of the reaction in the reactions file, and thus, I decided to label both reactions with the same number. """ def reduce_fn( xs: tuple[tuple[list[Compound], list[Compound]], list[tuple[str, str]]] , x: tuple[str, str] ) -> tuple[tuple[list[Compound], list[Compound]], list[tuple[str, str]]]: match x: case (_, ""): pass case (ReactionCol.CLeft, _): xs[0][0].append(assure_compound(x[1], cs)) case (ReactionCol.CRight, _): xs[0][1].append(assure_compound(x[1], cs)) case _: xs[1].append(x) return xs (cl, cr), arg = reduce( reduce_fn , zip(h, l.split(',')) , (([], []), []) ) kw: dict[str, str] = dict(arg) vis: str | None = kw.get(CompoundCol.Visible) ncs: tuple[tuple[tuple[Compound, ...], tuple[Compound, ...]], ...] match kw.get(ReactionCol.Direction): case Direction.Left: ncs = ((tuple(cr), tuple(cl)),) case Direction.Biderectional: ncs = ((tuple(cr), tuple(cl)), (tuple(cl), tuple(cr))) case _: ncs = ((tuple(cl), tuple(cr)),) return tuple(map( lambda xs: Reaction( name=str(kw[ReactionCol.Name]) , compounds=xs , energy=float(kw[ReactionCol.Energy]) , idx=int(idx) , visible=Visibility.TRUE if vis is None else parse_vis(vis) , opts=apply_maybe(parse_opts, kw.get(CompoundCol.Opts))) , ncs ))