# Derive and constrain model
## Setup
### Import packages

In [1]:
from pathlib import Path
from warnings import warn

import pandas as pd
from cobra.core import Group
from cobra.manipulation import remove_genes
from cobra.util.array import create_stoichiometric_matrix, nullspace
from rbc_gem_utils import (
    ANALYSIS_PATH,
    COBRA_CONFIGURATION,
    ROOT_PATH,
    read_rbc_model,
    show_versions,
    write_cobra_model,
)

pd.set_option("display.precision", 6)

import gurobipy as gp

gp.setParam("OutputFlag", 0)
gp.setParam("LogToConsole", 0)

# Show versions of notebook
show_versions()

Set parameter Username

Package Information
-------------------
rbc-gem-utils 0.0.1

Dependency Information
----------------------
beautifulsoup4                       4.12.3
bio                                   1.6.2
cobra                                0.29.0
depinfo                               2.2.0
kaleido                               0.2.1
matplotlib                            3.8.2
memote                               0.17.0
networkx                              3.2.1
notebook                              7.0.7
openpyxl                              3.1.2
pandas                                2.2.0
pre-commit                            3.6.0
pyvis                                 0.3.2
rbc-gem-utils[database,network,vis] missing
requests                             2.31.0
scipy                                1.12.0
seaborn                              0.13.2

Build Tools Information
-----------------------
pip        23.3.1
setuptools 68.2.2
wheel      0.41.2

Platform Informat

### Define configuration
#### COBRA Configuration

In [2]:
COBRA_CONFIGURATION.solver = "gurobi"
COBRA_CONFIGURATION.bounds = (-1e3, 1e3)
COBRA_CONFIGURATION

Attribute,Description,Value
solver,Mathematical optimization solver,gurobi
tolerance,"General solver tolerance (feasibility, integrality, etc.)",1e-07
lower_bound,Default reaction lower bound,-1000.0
upper_bound,Default reaction upper bound,1000.0
processes,Number of parallel processes,15
cache_directory,Path for the model cache,/Users/zhaiman/Library/Caches/cobrapy
max_cache_size,Maximum cache size in bytes,104857600
cache_expiration,Model cache expiration time in seconds (if any),


## Load RBC-GEM model

In [3]:
rbc_gem = read_rbc_model(filetype="xml")
for r in rbc_gem.reactions:
    if r.bounds == (0.0, 1000.0):
        r.bounds = (0, COBRA_CONFIGURATION.upper_bound)
    elif r.bounds == (-1000.0, 0.0):
        r.bounds = (COBRA_CONFIGURATION.lower_bound, 0.0)
    else:
        r.bounds = COBRA_CONFIGURATION.bounds

data_path = (ROOT_PATH / ANALYSIS_PATH / "OVERLAY").resolve()
print(data_path)
overwrite = True
rbc_gem

/Users/zhaiman/opt/github/RBC-GEM/data/analysis/OVERLAY


0,1
Name,RBC_GEM
Memory address,1526636d0
Number of metabolites,2157
Number of reactions,3275
Number of genes,820
Number of groups,78
Objective expression,1.0*NaKt - 1.0*NaKt_reverse_db47e
Compartments,"cytosol, extracellular space"


### Extract model
Load the full reconstruction or use a pre-defined model for a reduced RBC model.

In [4]:
reduced_model_id = None
model = rbc_gem.copy()

boundary_types_dict = {
    "exchange": "EX_",
    "sink": "SK_",
    "demand": "DM_",
}


if reduced_model_id is not None and reduced_model_id != rbc_gem.id:
    df_reactions = pd.read_csv(
        ROOT_PATH
        / ANALYSIS_PATH
        / "reduced_models"
        / f"model_reactions_{reduced_model_id}.tsv",
        sep="\t",
        index_col=0,
        dtype=str,
    ).fillna("")

    # Determine pseudoreactions if any
    pseudoreactions = set()
    boundaries = {}
    for btype, prefix in boundary_types_dict.items():
        boundaries[btype] = {
            r: r.replace(prefix, "") for r in df_reactions.index if r.startswith(prefix)
        }
        pseudoreactions.update([r for r in df_reactions.index if r.startswith(prefix)])

    # Extract reactions from model that are not pseudoreactions
    reactions = sorted(list(df_reactions.index.difference(pseudoreactions)))
    reactions = model.reactions.get_by_any(reactions)
    # Remove reactions not in list
    model.remove_reactions(
        [x for x in model.reactions if x not in reactions], remove_orphans=True
    )
    # Remove orphaned groups not in list
    model.remove_groups([x for x in model.groups if not x.members])
    # Cleanup gene reaction rules
    for reaction in reactions:
        reaction.gene_reaction_rule = df_reactions.loc[
            reaction.id, "gene_reaction_rule"
        ]
    to_remove = model.genes.query(lambda x: len(x.reactions) < 1)
    remove_genes(model, gene_list=to_remove, remove_reactions=False)

    # Add pseudoreactions, defining new ones if needed.
    for btype, boundary_dict in boundaries.items():
        for reaction, met in boundary_dict.items():
            try:
                met = model.metabolites.get_by_id(met)
            except KeyError as e:
                warn(f"{met} not in model")
            else:
                try:
                    reaction = model.add_boundary(met, type=btype)
                except ValueError:
                    reaction = model.reactions.get_by_id(reaction)

    for reaction in model.boundary:
        reaction.subsystem = "Pseudoreactions"

    model.id = reduced_model_id
model

0,1
Name,RBC_GEM
Memory address,1543f9f50
Number of metabolites,2157
Number of reactions,3275
Number of genes,820
Number of groups,78
Objective expression,1.0*NaKt - 1.0*NaKt_reverse_db47e
Compartments,"cytosol, extracellular space"


### Check blocked reactions

In [5]:
# blocked_reactions = find_blocked_reactions(model, open_exchanges=False);
# blocked_reactions = sorted([str(r) for r in model.reactions.get_by_any(blocked_reactions) if r.subsystem not in {"Pseudoreactions", "Transport, extracellular"}])
# blocked_reactions

### Set objective(s)

In [6]:
test_model = model.copy()
for r in test_model.boundary:
    r.bounds = (0, COBRA_CONFIGURATION.upper_bound)
test_model.reactions.get_by_id("EX_glc__D_e").bounds = (-1, 0)


sol = test_model.optimize()
sol.fluxes[sol.fluxes != 0]
objective_rxns = ["NaKt"]
model.objective = sum(
    [test_model.reactions.get_by_id(rid).flux_expression for rid in objective_rxns]
)
sol = test_model.optimize()
print(sol[objective_rxns])
sol.fluxes[sol.fluxes != 0]

NaKt    2.0
Name: fluxes, dtype: float64


ACt2           2.0
EX_glc__D_e   -1.0
EX_h_e         2.0
EX_lac__L_e    2.0
ENO            2.0
FBA            1.0
GAPD           2.0
LDH_L          2.0
NaKt           2.0
PFK            1.0
PGI            1.0
PGK            2.0
PGM            2.0
PYK            2.0
TPI            1.0
ADPGK          1.0
DADK4          1.0
NDPK9          1.0
ADK7           1.0
DADK7         -1.0
CYSTHRNaEx     3.0
GLC_Dt        -1.0
Kt1            4.0
PYR_ACtex      2.0
PYR_LLACtex   -2.0
THRCYSNaEx     3.0
Name: fluxes, dtype: float64

### Get dimension of nullspace

In [7]:
S = create_stoichiometric_matrix(model)
ns = nullspace(S)
ns.shape

(3275, 1258)

### Set reaction bounds

In [8]:
# constraints_bounds_filepath = data_path / "constraints_reactions.tsv"
# df_constraints_bounds = pd.read_csv(
#     constraints_bounds_filepath,
#     sep="\t",
#     index_col="reactions"
# ).replace("", pd.NA)

# df_constraints_bounds["lower_bound"] = df_constraints_bounds["lower_bound"].replace("DEFAULT", COBRA_CONFIGURATION.lower_bound).astype(float)
# df_constraints_bounds["upper_bound"] = df_constraints_bounds["upper_bound"].replace("DEFAULT", COBRA_CONFIGURATION.upper_bound).astype(float)
# bound_cols = ["lower_bound", "upper_bound"]

# not_found = set()
# for rid, row in df_constraints_bounds.iterrows():
#     try:
#         reaction = model.reactions.get_by_id(rid)
#     except KeyError as e:
#         not_found.add(rid)
#         # warn(f"Could not find {rid} in model {model.id}.")
#         continue
#     # Convert bounds from mmol / hr / L cell --> mmol / hr / gDW
#     reaction.bounds = convert_L_to_gDW(row[bound_cols].values)
# if len(not_found):
#     warn(f"Could not find {len(not_found)} reactions in model {model.id}.")

# df_constraints_bounds = df_constraints_bounds.loc[~df_constraints_bounds.index.isin(not_found)]
# # df_constraints_bounds.to_csv(model_dirpath / "constraints_reactions.tsv", sep="\t", index=False)
# df_constraints_bounds.head()

### Set additional constraints
#### Ratio constraints

In [9]:
# constraints_ratios_filepath =  data_path / "constraints_additional.tsv"
# df_constraints_additional = pd.read_csv(
#     constraints_ratios_filepath,
#     sep="\t",
#     index_col="constraints",
# )

# ratio_ids = set()
# skipped_constraints = set()
# not_found = set()
# for constraint_id, row in df_constraints_additional.iterrows():
#     subs_dict = {}
#     lhs = parse_expr(row["lhs"])
#     rhs = parse_expr(row["rhs"])

#     csense = row["csense"]
#     lb=None if csense == "<" else 0
#     ub=None if csense == ">" else 0
#     reactions = row["reactions"].split(";")
#     for reaction in reactions:
#         try:
#             reaction = model.reactions.get_by_id(reaction)
#         except Exception:
#             if reaction == str(rhs) or reaction == str(lhs):
#                 skipped_constraints.add(constraint_id)
#                 continue
#             else:
#                 not_found.add(reaction)
#                 subs_dict[reaction] = 0
#         else:
#             subs_dict[reaction.id] = reaction.flux_expression
#     if (str(rhs) == "0" or str(lhs) == "0") and len([r for r in reactions if r not in not_found]) <= 1:
#         skipped_constraints.add(constraint_id)
#     if constraint_id in skipped_constraints:
#         continue
#     abundance = lhs - rhs
#     abundance = abundance.subs(subs_dict)
#     if str(abundance) == "0":
#         print(f"{constraint_id} is always equal to 0, not including.")
#         skipped_constraints.add(constraint_id)
#         continue
#     try:
#         constraint = model.constraints[constraint_id]
#     except Exception:
#         pass
#     else:
#         model.remove_cons_vars(constraint)
#     constraint = model.problem.Constraint(
#         abundance=abundance,
#         name=constraint_id,
#         lb=float(lb) if lb is not None else lb,
#         ub=float(ub) if ub is not None else ub,
#     )
#     model.add_cons_vars(constraint)
#     # Convert units
#     if constraint.lb is not None:
#         constraint.lb = convert_L_to_gDW(float(constraint.lb))
#     if constraint.ub is not None:
#         constraint.ub = convert_L_to_gDW(float(constraint.ub))
#     df_constraints_additional.loc[constraint_id, "reactions"] = ";".join([r for r in reactions if r not in not_found])
#     print(constraint)
#     df_constraints_additional.loc[constraint_id, "lhs"] = str(lhs)
#     df_constraints_additional.loc[constraint_id, "rhs"] = str(rhs)


# df_constraints_additional = df_constraints_additional.loc[~df_constraints_additional.index.isin(not_found.union(skipped_constraints))]
# df_constraints_additional.to_csv(model_dirpath / "constraints_additional.tsv", sep="\t", index=False)
# df_constraints_additional

### Reset subsystems

In [10]:
model.remove_groups(model.groups)
for subsystem in sorted(set(model.reactions.list_attr("subsystem"))):
    reaction_list = model.reactions.query(lambda x: x.subsystem == subsystem)
    if subsystem not in model.groups:
        group = Group(id=subsystem, name=subsystem, members=reaction_list)
        model.add_groups([group])
    else:
        group = model.groups.get_by_id(subsystem).add_members(reaction_list)

### Export model

In [11]:
model_dirpath = data_path / model.id
model_dirpath.mkdir(exist_ok=True)
write_cobra_model(model, filename=model_dirpath / f"{model.id}.xml")
model

0,1
Name,RBC_GEM
Memory address,1543f9f50
Number of metabolites,2157
Number of reactions,3275
Number of genes,820
Number of groups,78
Objective expression,1.0*NaKt - 1.0*NaKt_reverse_db47e
Compartments,"cytosol, extracellular space"


### Check bounds

In [12]:
from cobra import flux_analysis

In [13]:
fva_sol = flux_analysis.flux_variability_analysis(
    model,
    loopless=False,
    fraction_of_optimum=0.0,
)

Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username


In [14]:
test_model = model.copy()
for r in test_model.boundary:
    r.bounds = (0, COBRA_CONFIGURATION.upper_bound)
test_model.reactions.get_by_id("EX_glc__D_e").bounds = (-1, 0)


test_fva_sol = flux_analysis.flux_variability_analysis(
    test_model,
    loopless=False,
    fraction_of_optimum=0.0,
)
test_fva_sol

Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username
Set parameter Username


Unnamed: 0,minimum,maximum
UNK3,0.0,0.0
4PYRDXABCte,0.0,0.0
5AOPt2,0.0,0.0
EX_ade_e,0.0,0.0
EX_adn_e,0.0,0.0
...,...,...
SK_avite1qn_c,0.0,0.0
SK_prdx2crd_c,0.0,0.0
SK_prdx2cso3_c,0.0,0.0
SK_grdx2crd_c,0.0,0.0
