# Create Protein-Constrained RBC model via OVERLAY workflow 
This notebook facilitates the construction of a proteome constrained model ("pcModel") via the OVERLAY methodology.
## Setup
### Import packages

In [None]:
import itertools
from collections import defaultdict

import gurobipy as gp
import pandas as pd
from rbc_gem_utils import (
    COBRA_CONFIGURATION,
    build_string,
    get_annotation_df,
    get_dirpath,
    read_cobra_model,
    show_versions,
    split_string,
    write_cobra_model,
)
from rbc_gem_utils.analysis.overlay import (
    ATTR_SUBCLASS_DICT,
    DEFAULT_KEFF,
    DEFAULT_PREFIX_SUFFIX_VALUES,
    DEFAULT_PROTEOME_COMPARTMENT,
    Budget,
    BudgetDilution,
    Complex,
    ComplexDilution,
    Enzyme,
    EnzymeDilution,
    Protein,
    ProteinDilution,
    construct_pcmodel_from_tables,
    create_complex_table,
    create_enzyme_table,
    create_protein_table,
    create_sequence_table,
)
from rbc_gem_utils.database import MGI_DB_TAG, UNIPROT_DB_TAG
from rbc_gem_utils.util import strip_plural

gp.setParam("OutputFlag", 0)
gp.setParam("LogToConsole", 0)

# Show versions of notebook
show_versions()

### Define configuration
#### COBRA Configuration

In [None]:
COBRA_CONFIGURATION.solver = "gurobi"
COBRA_CONFIGURATION.bounds = (-1e3, 1e3)
COBRA_CONFIGURATION

### Define organism and model

In [None]:
organism = "Human"
model_id = "RBC_GEM"

### Set paths

In [None]:
database_dirpath = get_dirpath("database", UNIPROT_DB_TAG)
overlay_dirpath = get_dirpath("analysis") / "OVERLAY" / organism
model_dirpath = overlay_dirpath / model_id

### Define hemoglobin proteins

In [None]:
hemoglobin_proteins = {
    "HBA": "P69905",  # Hemoglobin subunit alpha
    "HBB": "P68871",  # Hemoglobin subunit beta
    "HBD": "P02042",  # Hemoglobin subunit delta
    "HBE1": "P02100",  # Hemoglobin subunit beta
    "HBG1": "P69891",  # Hemoglobin subunit gamma-1
    "HBG2": "P69892",  # Hemoglobin subunit gamma-2
    "HBM": "Q6B0K9",  # Hemoglobin subunit mu
    "HBQ1": "P09105",  # Hemoglobin subunit theta-1
    "HBZ": "P02008",  # Hemoglobin subunit zeta
}

## Load RBC model

In [None]:
model = read_cobra_model(filename=model_dirpath / f"{model_id}.xml")
model

In [None]:
annotation_type = "genes"
mapping_key = "uniprot"
annotation_cols = [mapping_key]

df_model_mappings = (
    get_annotation_df(model.genes, annotation_cols)
    .rename({"id": annotation_type}, axis=1)
    .dropna(subset=[mapping_key])
)
for col in df_model_mappings.columns:
    df_model_mappings[col] = df_model_mappings[col].apply(lambda x: split_string(x))
    df_model_mappings = df_model_mappings.explode(col).drop_duplicates().dropna()
df_model_mappings = df_model_mappings.sort_values(annotation_type)

print(df_model_mappings.nunique(dropna=True))
df_model_mappings = df_model_mappings.reset_index(drop=True)
df_model_mappings

## Assemble data for PC-model
### Load protein data
#### Protein amino acid sequences

In [None]:
df_isoforms_sequences = pd.read_csv(
    database_dirpath / f"{UNIPROT_DB_TAG}_isoforms_sequences.tsv",
    sep="\t",
    index_col=None,
).fillna(pd.NA)
print(df_isoforms_sequences[df_isoforms_sequences["erythroid"]]["uniprot"].unique())
df_isoforms_sequences

#### Determine protein isoforms and associated sequences

In [None]:
# Erythroid first, then canonical to enable drop_duplicates to keep erythroid over canonical
df_model_isoforms_sequences = (
    pd.concat(
        (
            df_isoforms_sequences[df_isoforms_sequences["erythroid"]],
            df_isoforms_sequences[df_isoforms_sequences["canonical"]],
            df_isoforms_sequences[df_isoforms_sequences["backup"]],
        ),
        axis=0,
    )
    .fillna(pd.NA)
    .drop_duplicates()
    .sort_values(
        ["uniprot", "erythroid", "uniprot.isoform"], ascending=[True, False, True]
    )
)

print(
    df_model_isoforms_sequences[["canonical", "erythroid", "backup", "avoid"]].sum(
        axis=0
    )
)
print(f"Total: {len(df_model_isoforms_sequences)}")
df_model_isoforms_sequences = df_model_isoforms_sequences.loc[
    :,
    [
        "uniprot",
        "uniprot.isoform",
        "sequence.id",
        "sequence",
        "sequence.length",
        "canonical",
        "erythroid",
        "backup",
        "avoid",
    ],
].reset_index(drop=True)


df_model_isoforms_sequences = df_model_isoforms_sequences.copy()
df_model_isoforms_sequences["keep"] = df_model_isoforms_sequences["canonical"].values
to_avoid = df_model_isoforms_sequences[df_model_isoforms_sequences["avoid"]][
    "uniprot"
].to_dict()
df_model_isoforms_sequences.loc[
    list(to_avoid),
    "keep",
] = False

df_possible_backups = df_model_isoforms_sequences[
    df_model_isoforms_sequences["uniprot"].isin(list(to_avoid.values()))
]
df_possible_backups = df_possible_backups[~df_possible_backups["avoid"]]
df_model_isoforms_sequences.loc[
    list(df_possible_backups.index),
    "keep",
] = True
df_model_isoforms_sequences.loc[
    df_model_isoforms_sequences[df_model_isoforms_sequences["erythroid"]].index,
    "keep",
] = True
df_model_isoforms_sequences = df_model_isoforms_sequences[
    df_model_isoforms_sequences["keep"]
]

lost_ids = set(df_isoforms_sequences["uniprot"].unique()).difference(
    set(df_model_isoforms_sequences["uniprot"].unique())
)
if lost_ids:
    lost_ids = df_isoforms_sequences[df_isoforms_sequences["uniprot"].isin(lost_ids)]
    df_model_isoforms_sequences = pd.concat(
        (df_model_isoforms_sequences, lost_ids[lost_ids["canonical"]]), axis=0
    )
print()
print(
    df_model_isoforms_sequences[["canonical", "erythroid", "backup", "avoid"]].sum(
        axis=0
    )
)
print(f"Total: {len(df_model_isoforms_sequences)}")

df_model_isoforms_sequences = df_model_isoforms_sequences.loc[
    :, ["uniprot", "sequence.id", "sequence"]
].copy()
df_sequence_data = (
    df_model_mappings.merge(
        df_model_isoforms_sequences, left_on="uniprot", right_on="uniprot"
    )
    .loc[:, ["genes", "uniprot", "sequence.id", "sequence"]]
    .copy()
)
df_sequence_data

In [None]:
mapping_key = "uniprot"
protein_id_key = (
    "sequence.id.genes"  # genes, uniprot, sequence.id, or sequence.id.genes are best,
)
unique_gene_to_protein_map = True
isoform_transform = False
df_sequence_data = create_sequence_table(
    df_sequence_data=df_sequence_data,
    mapping_key=mapping_key,
    isoform_transform=isoform_transform,
)
ordered_isoform_ids = df_sequence_data[df_sequence_data["uniprot"].duplicated(False)][
    "sequence.id"
]
df_isoforms = df_sequence_data[
    df_sequence_data["sequence.id"].isin(ordered_isoform_ids)
].copy()
print(f"Number of proteins: {len(df_isoforms[mapping_key].unique())}")
print(f"Number of isoforms: {len(df_isoforms['sequence.id'].unique())}")
df_sequence_data = df_sequence_data.set_index("sequence.id")
df_sequence_data = pd.concat(
    (
        df_sequence_data.loc[ordered_isoform_ids],
        df_sequence_data.loc[df_sequence_data.index.difference(ordered_isoform_ids)],
    ),
    axis=0,
)
df_sequence_data = df_sequence_data.reset_index(drop=False)
df_sequence_data = df_sequence_data.loc[
    :, ["genes", "uniprot", "sequence.id", "sequence"]
].copy()
# print(df_isoforms[mapping_key])
if protein_id_key == "sequence.id.genes":
    protein_id_key = "protein.id"
    sequence_id_updates = df_model_mappings.set_index("uniprot")["genes"].to_dict()
    df_sequence_data["protein.id"] = df_sequence_data["sequence.id"].apply(
        lambda seq_id: "_".join(
            [sequence_id_updates.get(x, x) for x in seq_id.split("-")]
        )
    )
    df_isoforms["protein.id"] = df_isoforms["sequence.id"].apply(
        lambda seq_id: "_".join(
            [sequence_id_updates.get(x, x) for x in seq_id.split("-")]
        )
    )
    ids_to_fix = df_sequence_data[
        ~df_sequence_data["sequence.id"].isin(df_isoforms["sequence.id"])
    ].index
    df_sequence_data.loc[ids_to_fix, "protein.id"] = df_sequence_data.loc[
        ids_to_fix, "protein.id"
    ].apply(lambda x: x.split("_")[0])

# Use to remove duplicates
if unique_gene_to_protein_map:
    df_sequence_data = df_sequence_data.drop_duplicates(
        subset=["uniprot"],
        keep="first",
    )
    protein_id_key = "genes"

df_sequence_data

###  List all unique proteins, complexes, and enzymes
#### Option 1: Initialize draft tables
1. The draft tables are created and used to initialize the draft PC-model.
    * The protein table can be used to initialize proteins and their molar weight ($\textbf{d}$ vector).
    * The complex table can be used to initialize complexes with their subunit stoichiometry ($\textbf{C}$ matrix).
        * All stoichiometric coefficients are initialized at a value of one.
    * The enzyme table can be used to initialize enzymes with their effective rate constants ($\textbf{K}_\mathrm{eff}$ matrix).
        * All $k_\mathrm{eff}$ values are initialized at average rate constant of 65 $s^{-1}$  (or equivalently, 234000 $hr^{-1})$.

2. The draft tables are made to be facilitate curation and data replacement. Therefore, the draft PC-model is exported with the draft tables. 
3. A refined PC-model can be created using the curated tables. 

#### Option 2: Load tables from files
4. The formation of a draft model can be skipped if the curated tables already exist. They can be loaded.

In [None]:
pcmodel_tables = {}
replace_compartments = {
    # Cytosol:extracellular --> plasma membrane
    "c": "c",
    "ce": "pm",
    "e": "e",  # Most extracellular reactions that occur are due to proteins bound to the external side of them membrane.
}

# Convert all protein compartments to one compartment
simplify_compartments = True
prefix = True
optional_columns = True
map_human_to_organism = True

# Enzyme values for new tables
max_weight_fraction = 100
enzyme_keff_base = DEFAULT_KEFF

dict_of_id_keys = {
    "proteins": protein_id_key,
    "complexes": None,
    "enzymes": "reactions",
}

# Provide filepaths to load a specific model
model_filepaths = {
    # "proteins": model_dirpath / f"pcmodel_{model}_proteins.tsv",
    # "complexes": model_dirpath / f"pcmodel_{model}_complexes.tsv",
    # "enzymes": model_dirpath / f"pcmodel_{model}_enzymes.tsv",
    # "complex_keffs": model_dirpath / f"pcmodel_{model}_complex_keffs.tsv",
    # "enzyme_keffs": model_dirpath / f"pcmodel_{model}_enzyme_keffs.tsv",
    # "constraints_proteins": model_dirpath / f"pcmodel_{model}_constraints_proteins.tsv",
    # "constraints_reactions": model_dirpath / f"pcmodel_{model}_constraints_reactions.tsv",
    # "constraints_additional": model_dirpath / f"pcmodel_{model}_constraints_additional.tsv",
}

# Provide filepaths to general files
filepaths = {
    "proteins": overlay_dirpath / "pcmodel_proteins.tsv",
    "complexes": overlay_dirpath / "pcmodel_complexes.tsv",
    # "enzymes": overlay_dirpath / "pcmodel_enzymes.tsv",
    # "complex_keffs": overlay_dirpath / "pcmodel_complex_keffs.tsv",
    # "enzyme_keffs": overlay_dirpath / "pcmodel_enzyme_keffs.tsv",
    # "constraints_proteins": overlay_dirpath / f"pcmodel_constraints_proteins.tsv",
    # "constraints_reactions": overlay_dirpath / f"pcmodel_constraints_reactions.tsv",
    # "constraints_additional": overlay_dirpath / f"pcmodel_constraints_additional.tsv",
}

###### Create protein table

In [None]:
table_type = "proteins"
try:
    # Try loading previously build model proteins
    df_proteins = pd.read_csv(model_filepaths[table_type], sep="\t", index_col=None)
    print("Loaded from previously generated file")
except (FileNotFoundError, KeyError):
    # Otherwise try using main RBC-GEM files to make model proteins
    try:
        df_proteins = pd.read_csv(filepaths[table_type], sep="\t", index_col=None)
    except (FileNotFoundError, KeyError):
        # Otherwise, make from scratch
        df_proteins = create_protein_table(
            model,
            df_sequence_data,
            id_key=dict_of_id_keys.get(table_type),
            prefix=prefix,
            optional_columns=optional_columns,
            annotation_columns=[
                "uniprot",
            ],
            replace_compartments=replace_compartments,
        )
        print("Created new table")
        # Create column for identifiers if None exists, or if compartments were replaced
        if not isoform_transform:
            df_proteins[df_proteins[table_type].duplicated(False)]
    else:
        df_proteins = df_sequence_data.merge(
            df_proteins[["uniprot", "compartment"]],
            left_on="uniprot",
            right_on="uniprot",
            how="left",
        )
        df_proteins["protein"] = df_proteins[protein_id_key].apply(
            lambda x: f"protein_{x}"
        )
        df_proteins = df_proteins.drop("protein.id", axis=1)
        df_proteins = df_proteins[
            df_proteins["genes"].isin(model.genes.list_attr("id"))
        ].reset_index(drop=True)
        print("Loaded from main RBC-GEM file")

if simplify_compartments:
    df_proteins = df_proteins.groupby(["genes", "protein"]).agg(
        lambda values: ";".join(
            [str(value) for value in list(values.dropna().unique())]
        )
    )
    df_proteins["compartment"] = DEFAULT_PROTEOME_COMPARTMENT
    df_proteins = df_proteins.reset_index(drop=False)

df_proteins["proteins"] = df_proteins[[strip_plural(table_type), "compartment"]].apply(
    lambda x: "_".join(x.values), axis=1
)
df_proteins = df_proteins.set_index(strip_plural(table_type))
pcmodel_tables[table_type] = df_proteins.copy()

if organism != "Human" and map_human_to_organism:
    discrepancies = df_organism[~df_organism["genes"].isin(df_proteins["genes"])]
    if not discrepancies.empty:
        # A discrepancy may arise if a protein was deleted from UniProt but has not yet been recorded in the organism database.
        # This variable can be used for manually checking
        print(f"Discrepancies from organism mapping: {len(discrepancies)}")

df_proteins

###### Create complex table

In [None]:
table_type = "complexes"
try:
    df_complexes = pd.read_csv(model_filepaths[table_type], sep="\t", index_col=None)
    print("Loaded from previously generated file")
except (FileNotFoundError, KeyError):
    try:
        df_complexes = pd.read_csv(filepaths[table_type], sep="\t", index_col=None)
    except (FileNotFoundError, KeyError):
        genes_to_proteins = (
            pcmodel_tables["proteins"]
            .groupby(["genes"], as_index=True)["proteins"]
            .agg(lambda x: build_string(list(x)))
            .to_dict()
        )
        cofactor_genes = {}
        # Create table
        df_complexes = create_complex_table(
            model,
            genes_to_proteins=genes_to_proteins,
            cofactor_genes=cofactor_genes,
            id_key=dict_of_id_keys.get(table_type),
            optional_columns=optional_columns,
            annotation_columns=[
                # "uniprot"
            ],
            replace_compartments=replace_compartments,
        )
        print("Created new table")
    else:
        df_complexes = df_complexes[
            df_complexes["genes"].apply(
                lambda genes: all(
                    [model.genes.has_id(gene) for gene in genes.split(";")]
                )
            )
        ]
        df_complexes["reactions"] = df_complexes["reactions"].apply(
            lambda reactions: ";".join(
                [r for r in reactions.split(";") if model.reactions.has_id(r)]
            )
        )
        df_complexes = df_complexes[df_complexes["reactions"] != ""]
        df_complexes = df_complexes.loc[
            :,
            [
                "complex",
                "subunits",
                "compartment",
                "reactions",
                "genes",
                "coefficients",
                "cofactors",
                "notes",
            ],
        ]
        print("Loaded from main RBC-GEM file")

    # Address isoform mapping to complexes
    isoforms_map = defaultdict(list)
    complex_name_update = defaultdict(list)
    for x in df_proteins[df_proteins["genes"].duplicated(False)].index:
        isoforms_map[x.rsplit("_", maxsplit=1)[0]].append(x)
        complex_name_update[
            x.rsplit("_", maxsplit=1)[0].replace("protein_", "")
        ].append(x.replace("protein_", ""))
    df_isoforms_complexes = df_complexes[
        df_complexes["subunits"].apply(
            lambda proteins: bool(set(isoforms_map).intersection(proteins.split(";")))
        )
    ]
    df_updated_rows = []
    for _, row in df_isoforms_complexes.iterrows():
        complex_names = [
            complex_name
            for complex_name in itertools.product(
                *[complex_name_update.get(c, [c]) for c in row["complex"].split("_")]
            )
        ]
        combos = [
            list(combo)
            for combo in itertools.product(
                *[
                    isoforms_map.get(protein, [protein])
                    for protein in row["subunits"].split(";")
                ]
            )
        ]

        for complex_name, combo in zip(complex_names, combos):
            new_row = row.to_dict()
            new_row["complex"] = "_".join(complex_name)
            new_row["subunits"] = ";".join(combo)
            df_updated_rows.append(new_row)

    df_complexes = pd.concat(
        (
            df_complexes[~df_complexes.index.isin(df_isoforms_complexes.index)],
            pd.DataFrame(df_updated_rows),
        ),
        axis=0,
    )

if simplify_compartments:
    df_complexes = df_complexes.groupby(["subunits", "complex"]).agg(
        lambda values: ";".join(
            [str(value) for value in list(values.dropna().unique())]
        )
    )
    df_complexes["compartment"] = DEFAULT_PROTEOME_COMPARTMENT
    df_complexes = df_complexes.reset_index(drop=False)

df_complexes["complexes"] = df_complexes[
    [strip_plural(table_type), "compartment"]
].apply(lambda x: "_".join(x.values), axis=1)
df_complexes["subunits"] = df_complexes[["subunits", "compartment"]].apply(
    lambda values: ";".join(
        [
            "_".join((x, values["compartment"])) if not x.endswith("_pc") else x
            for x in values["subunits"].split(";")
        ]
    ),
    axis=1,
)
df_complexes = df_complexes.set_index(strip_plural(table_type))
pcmodel_tables[table_type] = df_complexes.copy()
if organism != "Human" and map_human_to_organism:
    discrepancies = df_organism[~df_organism["genes"].isin(df_complexes["genes"])]
    if not discrepancies.empty:
        # A discrepancy may arise if a protein was deleted from UniProt but has not yet been recorded in the organism database.
        # This variable can be used for manually checking
        print(f"Discrepancies from organism mapping: {len(discrepancies)}")
df_complexes

##### Enzymes

In [None]:
table_type = "enzymes"
try:
    df_enzymes = pd.read_csv(model_filepaths[table_type], sep="\t", index_col=None)
    print("Loaded from previously generated file")
except (FileNotFoundError, KeyError):
    try:
        df_enzymes = pd.read_csv(filepaths[table_type], sep="\t", index_col=None)
    except (FileNotFoundError, KeyError):
        complexes_to_reactions = (
            pcmodel_tables["complexes"].set_index("complexes")["reactions"].to_dict()
        )
        df_enzymes = create_enzyme_table(
            model,
            complexes_to_reactions=complexes_to_reactions,
            id_key=dict_of_id_keys.get(table_type),
            optional_columns=optional_columns,
            annotation_columns=[
                # "uniprot"
            ],
            replace_compartments=replace_compartments,
        )
        if replace_compartments:
            df_enzymes["compartment"] = df_enzymes["compartment"].replace(
                replace_compartments
            )
        print("Created new table")
    else:
        print("Loaded from main RBC-GEM file")


if simplify_compartments:
    df_enzymes = df_enzymes.groupby(["complexes", "enzyme"]).agg(
        lambda values: ";".join(
            [str(value) for value in list(values.dropna().unique())]
        )
    )
    df_enzymes["compartment"] = DEFAULT_PROTEOME_COMPARTMENT
    df_enzymes = df_enzymes.reset_index(drop=False)

df_enzymes[table_type] = df_enzymes[[strip_plural(table_type), "compartment"]].apply(
    lambda x: "_".join(x.values), axis=1
)
df_enzymes = df_enzymes.set_index(strip_plural(table_type))
pcmodel_tables[table_type] = df_enzymes.copy()
df_enzymes

## Create PC-model

In [None]:
protein_table = pcmodel_tables["proteins"].reset_index(drop=False)
complex_table = pcmodel_tables["complexes"].reset_index(drop=False)
enzyme_table = pcmodel_tables["enzymes"].reset_index(drop=False)
max_weight_fraction = 100

pcmodel, final_pcmodel_tables = construct_pcmodel_from_tables(
    model,
    protein_table=protein_table,
    complex_table=complex_table,
    enzyme_table=enzyme_table,
    max_weight_fraction=max_weight_fraction,
    include_complex_dilutions=True,  # Relaxes constraints areound complexes. Recommend to start, can be set to zero later or removed entirely
    irrev_rxn_complex_keff=0,  # Set as None to ignore, small number to keep in model, 0 to remove from complex-enzyme mapping
)
if simplify_compartments:
    pcmodel.compartments = {DEFAULT_PROTEOME_COMPARTMENT: "protein compartment"}
# Print summary
for attr, subclass_dict in ATTR_SUBCLASS_DICT.items():
    n = len(
        getattr(pcmodel, attr).query(
            lambda x: not isinstance(x, tuple(subclass_dict.values()))
        )
    )
    print(f"Number of {attr}: {n}")
    for key, subcls in subclass_dict.items():
        obj_list = getattr(pcmodel, attr).query(lambda x: isinstance(x, subcls))
        n = len(obj_list)
        print(f"Number of {key}: {n}")
        if subcls in (Enzyme, EnzymeDilution):
            print(
                f'Forward variable: {len(obj_list.query(lambda x: DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["suffix.forward"] in x.id))}/{n}'
            )
            print(
                f'Reverse variable: {len(obj_list.query(lambda x: DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["suffix.reverse"] in x.id))}/{n}'
            )
            print(
                f'Summation variable : {len(obj_list.query(lambda x: DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["suffix.total"] in x.id))}/{n}'
            )
    print()

keff_table = final_pcmodel_tables["enzymes"].copy()
keff_table["direction"] = keff_table["reactions"].apply(
    lambda rid: model.reactions.get_by_id(rid).reaction
)
keff_table["direction"] = keff_table["direction"].apply(
    lambda x: x.replace("<=>", "-->")
)
keff_table["direction"] = keff_table[["enzyme", "direction"]].apply(
    lambda x: (
        x["direction"].replace("-->", "<--")
        if x["enzyme"].endswith(
            DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["suffix.reverse"]
        )
        else x["direction"]
    ),
    axis=1,
)
keff_table["complexes"] = keff_table["complexes"].apply(lambda x: x.split(";"))
keff_table["complex_keff"] = keff_table["complex_keff"].apply(lambda x: x.split(";"))
keff_table = keff_table.explode(["complexes", "complex_keff"])
keff_table["complex"] = keff_table[["complexes", "compartment"]].apply(
    lambda x: x["complexes"].replace(f"_{x['compartment']}", ""), axis=1
)
keff_table = keff_table.groupby(["enzyme", "complex"], as_index=False).agg(
    lambda x: list(x.unique())[0]
)
keff_table = keff_table.loc[
    :,
    [
        "enzyme",
        "enzyme_keff",
        "complex",
        "complex_keff",
        "compartment",
        "reactions",
        "direction",
    ],
]
complex_keff_table = keff_table.drop("enzyme_keff", axis=1).drop_duplicates()
enzyme_keff_table = (
    keff_table.groupby(["enzyme", "enzyme_keff"], as_index=False)[
        ["reactions", "direction"]
    ]
    .agg(lambda x: list(x.unique())[0])
    .drop_duplicates()
)
final_pcmodel_tables["complex_keffs"] = complex_keff_table
final_pcmodel_tables["enzyme_keffs"] = enzyme_keff_table

n_cplx_keff = len(
    complex_keff_table[complex_keff_table["complex_keff"].astype(float) != 0]
)
print(f"Number of non-zero complex rate constants: {n_cplx_keff}")

n_enzyme_keff = len(
    enzyme_keff_table[enzyme_keff_table["enzyme_keff"].astype(float) != 0]
)
print(f"Number of non-zero enzyme rate constants: {n_enzyme_keff}")


for table_type, df_table in final_pcmodel_tables.items():
    df_table.to_csv(
        model_dirpath / f"pcmodel_{pcmodel}_{table_type}.tsv", sep="\t", index=False
    )

### Formulate additional protein constraints
#### Address isoforms and compartments with additional constraints
For isoforms and/or compartments, place an additional constraint such that the total sum of all isoforms does not exceed the measured concentraiton value.

In [None]:
# protein_table = pcmodel_tables["proteins"]
# mapping_key = "uniprot"
# df_additional_constraints = protein_table[protein_table[mapping_key].duplicated(False)]
# df_additional_constraints = df_additional_constraints.groupby(
#     [
#         "genes",
#         mapping_key,
#     ],
#     as_index=False,
# ).agg(lambda x: list(x))
# if not df_additional_constraints.empty:
#     if "lower_bound" in df_additional_constraints.columns:
#         df_additional_constraints["lower_bound"] = df_additional_constraints[
#             "lower_bound"
#         ].apply(min)
#     if "upper_bound" in df_additional_constraints.columns:
#         df_additional_constraints["upper_bound"] = df_additional_constraints[
#             "upper_bound"
#         ].apply(max)

# data = {}
# for idx, row in df_additional_constraints.iterrows():
#     # Technically, always one gene but refers to genes attribute
#     genes = row["genes"]
#     uniprot = model.genes.get_by_id(genes).annotation.get(mapping_key, "")
#     proteins = split_string(row.get("proteins"))
#     proteins = pcmodel.metabolites.get_by_any(proteins)
#     is_compartment = len({p.compartment for p in proteins}) > 1
#     is_isoform = (
#         len(
#             {
#                 p.id.replace(f"_{p.compartment}", "").split(
#                     "_",
#                 )[-1]
#                 for p in proteins
#                 if p.id.replace(f"_{p.compartment}", "")
#                 .split(
#                     "_",
#                 )[-1]
#                 .isnumeric()
#             }
#         )
#         > 1
#     )
#     if is_compartment and not is_isoform:
#         default_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["constraints"]["prefix.compartent"]
#     elif is_isoform and not is_compartment:
#         default_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["constraints"]["prefix.isoform"]
#     else:
#         default_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["constraints"]["prefix.constraint"]
#     constraint_id = row.get("constraints", f"{default_prefix}{genes}")
#     lower_bound = float(row.get("lower_bound")) if row.get("lower_bound") else 0
#     upper_bound = (
#         float(row.get("upper_bound"))
#         if row.get("upper_bound")
#         else DEFAULT_CONCENTRATION_BOUND
#     )
#     protein_dilutions = [
#         reaction
#         for protein in proteins
#         for reaction in list(protein.reactions)
#         if reaction.id.endswith(protein.id)
#     ]
#     # "ISOCONS" is short for "ISOFORM CONSTRAINT"
#     # "COMPCONS" is short for "COMPARTMENT CONSTRAINT"
#     # "CONS" for general constraint
#     data[idx] = {
#         "constraints": constraint_id,
#         "genes": genes,
#         "proteins": build_string([p.id for p in proteins]),
#         "reactions": build_string([p.id for p in protein_dilutions]),
#         # Assume sum of isoforms is a constant, works well with proteomic measurements that do not distinguish
#         "coefficients": ";".join([str(1) for p in protein_dilutions]),
#         "lower_bound": lower_bound,
#         "upper_bound": upper_bound,
#         "unit": "nmol / gDW",
#         mapping_key: uniprot,
#     }
# df_additional_constraints = pd.DataFrame.from_dict(data, orient="index")
# df_additional_constraints.to_csv(
#     model_dirpath / f"pcmodel_{pcmodel.id}_constraints_proteins.tsv", sep="\t", index=False
# )
# df_additional_constraints

## Add additional protein constraints to model

In [None]:
# try:
#     df_additional_constraints = pd.read_csv(
#         model_dirpath / f"pcmodel_{pcmodel.id}_constraints_proteins.tsv",
#         sep="\t",
#         index_col=None,
#     )
# except (FileNotFoundError, pd.errors.EmptyDataError):
#     df_additional_constraints = pd.DataFrame()
# else:
#     if not df_additional_constraints.empty:
#         for constraint_id, row in df_additional_constraints.set_index(
#             "constraints"
#         ).iterrows():
#             reactions = pcmodel.reactions.get_by_any(row["reactions"].split(";"))
#             coefficients = row["coefficients"].split(";")
#             abundance = sum(
#                 [
#                     int(coeff) * reaction.flux_expression
#                     for reaction, coeff in zip(reactions, coefficients)
#                 ]
#             )
#             lower_bound = float(row.get("lower_bound")) if row.get("lower_bound") else 0
#             upper_bound = (
#                 float(row.get("upper_bound"))
#                 if row.get("upper_bound")
#                 else DEFAULT_CONCENTRATION_BOUND
#             )
#             if constraint_id in pcmodel.constraints:
#                 # TODO warn
#                 pcmodel.remove_cons_vars(pcmodel.constraints[constraint_id])
#             additional_constraint = pcmodel.problem.Constraint(
#                 abundance,
#                 name=constraint_id,
#                 lb=lower_bound,
#                 ub=upper_bound,
#             )
#             pcmodel.add_cons_vars(additional_constraint)

# df_additional_constraints

### Add other additional constraints

In [None]:
# constraints_ratios_filepath = overlay_dirpath / f"pcmodel_constraints_additional.tsv"

# df_constraints_additional = pd.read_csv(
#     constraints_ratios_filepath,
#     sep="\t",
#     index_col="constraints",
# )

# ratio_ids = set()
# skipped_constraints = set()
# not_found = set()
# for constraint_id, row in df_constraints_additional.iterrows():
#     subs_dict = {}
#     lhs = parse_expr(row["lhs"])
#     rhs = parse_expr(row["rhs"])

#     csense = row["csense"]
#     lb=None if csense == "<" else 0
#     ub=None if csense == ">" else 0
#     reactions = row["reactions"].split(";")
#     for reaction in reactions:
#         try:
#             reaction = model.reactions.get_by_id(reaction)
#         except Exception:
#             if reaction == str(rhs) or reaction == str(lhs):
#                 skipped_constraints.add(constraint_id)
#                 continue
#             else:
#                 not_found.add(reaction)
#                 subs_dict[reaction] = 0
#         else:
#             subs_dict[reaction.id] = reaction.flux_expression
#     if (str(rhs) == "0" or str(lhs) == "0") and len([r for r in reactions if r not in not_found]) <= 1:
#         skipped_constraints.add(constraint_id)
#     if constraint_id in skipped_constraints:
#         continue
#     abundance = lhs - rhs
#     abundance = abundance.subs(subs_dict)
#     if str(abundance) == "0":
#         print(f"{constraint_id} is always equal to 0, not including.")
#         skipped_constraints.add(constraint_id)
#         continue
#     try:
#         constraint = model.constraints[constraint_id]
#     except Exception:
#         pass
#     else:
#         model.remove_cons_vars(constraint)
#     constraint = model.problem.Constraint(
#         abundance=abundance,
#         name=constraint_id,
#         lb=float(lb) if lb is not None else lb,
#         ub=float(ub) if ub is not None else ub,
#     )
#     model.add_cons_vars(constraint)
#     # Convert units
#     if constraint.lb is not None:
#         constraint.lb = convert_L_to_gDW(float(constraint.lb))
#     if constraint.ub is not None:
#         constraint.ub = convert_L_to_gDW(float(constraint.ub))
#     df_constraints_additional.loc[constraint_id, "reactions"] = ";".join([r for r in reactions if r not in not_found])
#     print(constraint)
#     df_constraints_additional.loc[constraint_id, "lhs"] = str(lhs)
#     df_constraints_additional.loc[constraint_id, "rhs"] = str(rhs)


# df_constraints_additional = df_constraints_additional.loc[~df_constraints_additional.index.isin(not_found.union(skipped_constraints))]
# df_constraints_additional.to_csv(model_dirpath / f"pcmodel_{pcmodel.id}_constraints_additional.tsv", sep="\t", index=False)
# df_constraints_additional

### Annotate objects for protein constraints
Annotating objects that are used for protein constraints will make subsequent analyses easier to perform.
#### Proteins

In [None]:
table_type = "proteins"
table_cols = ["genes", "uniprot", "sequence"]
obj_type = Protein
table = protein_table.set_index(table_type)[table_cols]

# Rename sequence
table = table.rename({"sequence": "uniprot.sequence"}, axis=1)


annotation_mappings = {
    table_key: table[table_key].to_dict() for table_key in table.columns
}

for met in pcmodel.metabolites.query(lambda x: isinstance(x, obj_type)):
    annotation_dict = {
        table_key: mapping_dict[met.id]
        for table_key, mapping_dict in annotation_mappings.items()
        if mapping_dict.get(met.id)
    }
    met.annotation.update(annotation_dict)
    rxn = pcmodel.reactions.get_by_id(
        f'{DEFAULT_PREFIX_SUFFIX_VALUES[table_type]["prefix.dilution"]}{met.id}'
    )
    rxn.annotation.update(
        {
            table_key: mapping_dict[met.id]
            for table_key, mapping_dict in annotation_mappings.items()
            if mapping_dict.get(met.id)
        }
    )
table

#### Complexes

In [None]:
table_type = "complexes"
table_cols = ["genes"]
obj_type = Complex
table = complex_table.set_index(table_type)[table_cols]
# Add uniprot to table
table["uniprot"] = table["genes"].apply(
    lambda x: build_string(
        [y.annotation["uniprot"] for y in pcmodel.genes.get_by_any(split_string(x))]
    )
)
table


annotation_mappings = {
    table_key: table[table_key].to_dict() for table_key in table.columns
}

for met in pcmodel.metabolites.query(lambda x: isinstance(x, obj_type)):
    annotation_dict = {
        table_key: mapping_dict[met.id]
        for table_key, mapping_dict in annotation_mappings.items()
        if mapping_dict.get(met.id)
    }
    met.annotation.update(annotation_dict)
    rxn = pcmodel.reactions.get_by_id(
        f'{DEFAULT_PREFIX_SUFFIX_VALUES[table_type]["prefix.dilution"]}{met.id}'
    )
    rxn.annotation.update(
        {
            table_key: mapping_dict[met.id]
            for table_key, mapping_dict in annotation_mappings.items()
            if mapping_dict.get(met.id)
        }
    )
table

#### Enzymes

In [None]:
table_type = "enzymes"
table_cols = ["reactions"]
obj_type = Enzyme
table = enzyme_table.set_index(table_type)[table_cols]
# Add enzyme totals to table
table.index = pd.Index(
    [
        f'{x.rsplit("_", maxsplit=2)[0]}{DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["suffix.total"]}_{x.rsplit("_", maxsplit=1)[-1]}'
        for x in table.index
    ],
    name=table.index.name,
)
table = pd.concat((enzyme_table.set_index(table_type)[table_cols], table))


annotation_mappings = {
    table_key: table[table_key].to_dict() for table_key in table.columns
}

for met in pcmodel.metabolites.query(lambda x: isinstance(x, obj_type)):
    annotation_dict = {
        table_key: mapping_dict[met.id]
        for table_key, mapping_dict in annotation_mappings.items()
        if mapping_dict.get(met.id)
    }
    met.annotation.update(annotation_dict)
    rxn = pcmodel.reactions.get_by_id(
        f'{DEFAULT_PREFIX_SUFFIX_VALUES[table_type]["prefix.dilution"]}{met.id}'
    )
    rxn.annotation.update(
        {
            table_key: mapping_dict[met.id]
            for table_key, mapping_dict in annotation_mappings.items()
            if mapping_dict.get(met.id)
        }
    )
table

### Set budget constraints for low-abundance and high-abundance proteomes
* RBCs are enucleated, terminally differentiated cells that are composed of 95% to 98% Hb by dry mass (mass of all the constituents of a cell in the absence of water)
    * PMID: 13429433, PMID: 13999462, PMID: 21796773, **PMID: 34378368**
* Therefore, remove hemoglobin from the low abundance proteome budget constraint and create a new constraint specific to hemoglobin abundance.
* Assume 90-95% minimum of dry mass is hemoglobin, and up to 5-10% of dry mass are other proteins

In [None]:
# Split total budget into sectors, one for hemoglobin and one for low abundance proteome
budget_rxn_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["budgets"]["prefix.dilution"]
budget_met_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["budgets"]["prefix.metabolite"]
# Get budget total
budget_met_total = pcmodel.metabolites.get_by_id(f"{budget_met_prefix}total")
budget_rxn_total = pcmodel.reactions.get_by_id(f"{budget_rxn_prefix}{budget_met_total}")

# Create budget for low abundance proteins
budget_rxn_lap = budget_rxn_total.copy()
budget_met_lap = list(budget_rxn_lap.metabolites).pop()
# Set new IDs and names for metabolites
budget_met_lap.id = f"{budget_met_prefix}proteome"
budget_met_lap.name = "Budget constraint (Low abundance proteins)"
# Set new IDs and names for reactions
budget_rxn_lap.id = f"{budget_rxn_prefix}{budget_met_lap.id}"
budget_rxn_lap.name = "Budget demand (Low abundance proteins)"

# Create budget for hemoglobin proteins
budget_rxn_hbp = budget_rxn_total.copy()
budget_met_hbp = list(budget_rxn_hbp.metabolites).pop()
# Set new IDs and names for metabolites
budget_met_hbp.id = f"{budget_met_prefix}hemoglobin"
budget_met_hbp.name = "Budget constraint (Hemoglobin proteins)"
# Set new IDs and names for reactions
budget_rxn_hbp.id = f"{budget_rxn_prefix}{budget_met_hbp.id}"
budget_rxn_hbp.name = "Budget demand (Hemoglobin proteins)"

# Budget bounds for generic model
budget_rxn_total.bounds = (0, 1000)
budget_rxn_hbp.bounds = (900, 1000)
budget_rxn_lap.bounds = (0, 100)
pcmodel.add_reactions([budget_rxn_lap, budget_rxn_hbp])

# Divide total budget into hemoglobin and low abundance
budget_reations = pcmodel.reactions.query(lambda x: isinstance(x, BudgetDilution))
for reaction in budget_met_total.reactions:
    if reaction.id in budget_reations:
        continue
    # Determine if hemoglobin or low abundance
    elif any(
        [
            f'{DEFAULT_PREFIX_SUFFIX_VALUES["proteins"]["prefix.metabolite"]}{gid}'
            in reaction.id
            for gid in list(hemoglobin_proteins)
        ]
    ):
        budget_met = budget_met_hbp
    else:
        budget_met = budget_met_lap
    coeff = reaction.get_coefficient(budget_met_total)
    reaction.add_metabolites(
        {
            # Add to sector
            budget_met: coeff,
            budget_met_total: -coeff,
        }
    )
for budget_rxn in budget_reations:
    coeff = -1 if budget_rxn_total.id == budget_rxn.id else 1
    # Ensure budget total is reactant in its own reaction, otherwise is a product
    budget_rxn.add_metabolites({budget_met_total: coeff}, combine=False)
    print(budget_rxn)

### Ensure model can be optimized for glucose uptake

In [None]:
objective_rxns = ["NaKt"]
pcmodel.objective = sum(
    [pcmodel.reactions.get_by_id(rid).flux_expression for rid in objective_rxns]
)
pcsol = pcmodel.optimize()
pcsol.fluxes.loc[
    [r.id for r in model.reactions if r.id in pcsol.fluxes[pcsol.fluxes != 0].index]
].sort_index()

In [None]:
pcsol.fluxes.loc[
    [
        r.id
        for r in pcmodel.reactions.query(lambda x: isinstance(x, ProteinDilution))
        if r.id in pcsol.fluxes[pcsol.fluxes != 0].index
    ]
].sort_index()

In [None]:
pcsol.fluxes.loc[
    [
        r.id
        for r in pcmodel.reactions.query(lambda x: isinstance(x, EnzymeDilution))
        if r.id in pcsol.fluxes[pcsol.fluxes != 0].index
    ]
].sort_index()

### Export model

In [None]:
# Regular model
write_cobra_model(model, filename=model_dirpath / f"{model}.xml")
write_cobra_model(model, filename=model_dirpath / f"{model}.json")

# Protein constrained  without curated keffs
write_cobra_model(pcmodel, filename=model_dirpath / f"{pcmodel}.xml")
write_cobra_model(pcmodel, filename=model_dirpath / f"{pcmodel}.json")

### Update rate constants

In [None]:
# pcmodel_curated = load_overlay_model(filename=model_dirpath / f"{pcmodel.id}.xml")
# pcmodel_curated.id += "_keff_curated"
# df_complex_keffs = pd.read_csv(overlay_dirpath / "pcmodel_complex_keffs.tsv", sep="\t", index_col=None)
# df_enzyme_keffs = pd.read_csv(overlay_dirpath / "pcmodel_enzyme_keffs.tsv", sep="\t", index_col=None)

# cf = 1 / 1e6  # Conversion factor from nmol to mmol
# if simplify_compartments:
#     df_enzyme_keffs["compartment"] = DEFAULT_PROTEOME_COMPARTMENT
#     df_complex_keffs["compartment"] = DEFAULT_PROTEOME_COMPARTMENT

# df_enzyme_keffs["enzymes"] = df_enzyme_keffs[["enzyme", "compartment"]].apply(lambda x: "_".join(x.values), axis=1)
# df_complex_keffs["enzymes"] = df_complex_keffs[["enzyme", "compartment"]].apply(lambda x: "_".join(x.values), axis=1)
# df_complex_keffs["complexes"] = df_complex_keffs[["complex", "compartment"]].apply(lambda x: "_".join(x.values), axis=1)
# df_complex_keffs = df_complex_keffs[df_complex_keffs["complexes"].isin(
#     pcmodel_curated.metabolites.query(
#         lambda x: x.id.startswith("cplx_")).list_attr("id")
# )]

# df_complex_keffs["complex_keff"] = df_complex_keffs["complex_keff"].astype(float)
# df_enzyme_keffs["enzyme_keff"] = df_enzyme_keffs["enzyme_keff"].astype(float)

# df_enzyme_keffs = df_enzyme_keffs.drop_duplicates()
# df_complex_keffs = df_complex_keffs.drop_duplicates()
# for _, row in df_complex_keffs.iterrows():
#     enz = row["enzymes"]
#     try:
#         enz = pcmodel_curated.metabolites.get_by_id(enz)
#     except KeyError:
#         if enz.replace(DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["suffix.reverse"], "") in pcmodel_curated.metabolites:
#             enz_other_dir = pcmodel_curated.metabolites.get_by_id(enz.replace(DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["suffix.reverse"], ""))
#             missing_enz = enz_other_dir.copy()
#             missing_enz.id = enz
#             pcmodel_curated.add_metabolites([missing_enz])

#             enzyme_keff = df_enzyme_keffs[df_enzyme_keffs["enzymes"] == missing_enz.id]["enzyme_keff"].item()
#             df = df_complex_keffs[df_complex_keffs["enzymes"] == missing_enz.id].copy()
#             for _, (enzyme, cplx, complex_keff) in df[["enzyme", "complexes", "complex_keff"]].iterrows():
#                 if complex_keff == 0 or enzyme_keff == 0:
#                     continue

#                 keff = float(complex_keff) / float(enzyme_keff)
#                 formation_rxn = add_complex_formation_reaction(
#                     pcmodel_curated,
#                     missing_enz,
#                     "enzyme",
#                     coeff_map=f"{cplx}({keff})",
#                 )
#             if enzyme_keff != 0:
#                 dilution_rxn = add_dilution_reaction(
#                     pcmodel_curated,
#                     missing_enz,
#                     "enzyme",
#                 )

#                 for r in enz_other_dir.reactions:
#                     if not r.id in model.reactions:
#                         continue
#                     sign = -1 if enz_other_dir in r.reactants else 1
#                     pcmodel_curated.reactions.get_by_id(r.id).add_metabolites(
#                         {missing_enz: sign * (1 / enzyme_keff / cf)}, combine=False
#                     )
#     else:
#         enzyme_keff = df_enzyme_keffs[df_enzyme_keffs["enzymes"] == enz.id]["enzyme_keff"].item()
#         df = df_complex_keffs[df_complex_keffs["enzymes"] == enz.id].copy()
#         for _, (enzyme, cplx, complex_keff) in df[["enzyme", "complexes", "complex_keff"]].iterrows():
#             if complex_keff == 0 or enzyme_keff == 0:
#                 continue
#             keff = float(complex_keff) / float(enzyme_keff)
#             try:
#                 formation_rxn = pcmodel_curated.reactions.get_by_id(f"ENZFM_{enzyme}_{cplx}")
#             except KeyError:
#                 formation_rxn = add_complex_formation_reaction(
#                     pcmodel_curated,
#                     enz,
#                     "enzyme",
#                     coeff_map=f"{cplx}({keff})",
#                 )
#             else:
#                 formation_rxn.add_metabolites({cplx: -keff}, combine=False)
#         if enzyme_keff != 0:
#             for r in enz.reactions:
#                 if not r.id in model.reactions:
#                     continue
#                 sign = 1 if enz in r.products else -1
#                 pcmodel_curated.reactions.get_by_id(r.id).add_metabolites(
#                     {enz: sign * (1 / enzyme_keff / cf)}, combine=False
#                 )

# pcmodel_curated.remove_metabolites(pcmodel_curated.metabolites.query(lambda x: not x.reactions))
# df_complex_keffs.to_csv(model_dirpath / f"complex_keffs_{pcmodel_curated.id}.tsv", sep="\t", index=False)
# df_enzyme_keffs.to_csv(model_dirpath / f"enzyme_keffs_{pcmodel_curated.id}.tsv", sep="\t", index=False)

# # Print summary
# for attr, subclass_dict in ATTR_SUBCLASS_DICT.items():
#     n = len(getattr(pcmodel_curated, attr).query(lambda x: not isinstance(x, tuple(subclass_dict.values()))))
#     print(f"Number of {attr}: {n}")
#     for key, subcls in subclass_dict.items():
#         n = len(getattr(pcmodel_curated, attr).query(lambda x: isinstance(x, subcls)))
#         print(f"Number of {key}: {n}")
#     print()

# # Print summary
# for attr, subclass_dict in ATTR_SUBCLASS_DICT.items():
#     n = len(getattr(pcmodel_curated, attr).query(lambda x: not isinstance(x, tuple(subclass_dict.values()))))
#     print(f"Number of {attr}: {n}")
#     for key, subcls in subclass_dict.items():
#         n = len(getattr(pcmodel_curated, attr).query(lambda x: isinstance(x, subcls)))
#         print(f"Number of {key}: {n}")
#     print()


# try:
#     df_curated_complex_keffs = final_pcmodel_tables["complex_keffs"].set_index("enzymes")[["complexes", "complex_keff"]].copy()
# except KeyError:
#     print(f"Number of non-zero complex rate constants (curated): 0")
# else:
#     df_curated_complex_keffs = df_curated_complex_keffs.explode(["complexes", "complex_keff"]).reset_index(drop=False).drop_duplicates()
#     df_curated_complex_keffs = df_curated_complex_keffs[df_curated_complex_keffs["complex_keff"].astype(float) != 0.]
#     print(f"Number of non-zero complex rate constants (curated): {len(df_curated_complex_keffs)}")
# finally:
#     print(f"Number of non-zero complex rate constants (total): {n_cplx_keff}")
# formation_rxn
# try:
#     df_curated_enzyme_keffs = final_pcmodel_tables["enzyme_keffs"][["enzymes", "enzyme_keff"]].copy()
# except KeyError:
#     print(f"Number of non-zero enzyme rate constants (curated): 0")
# else:
#     df_curated_enzyme_keffs = df_curated_enzyme_keffs[df_curated_enzyme_keffs["enzyme_keff"].astype(float) != 0.]
#     print(f"Number of non-zero enzyme rate constants (curated): {len(df_curated_enzyme_keffs)}")
# finally:
#     print(f"Number of non-zero enzyme rate constants (total): {n_enzyme_keff}")

# write_cobra_model(pcmodel_curated, filename=model_dirpath / f"{pcmodel_curated}.xml")
# write_cobra_model(pcmodel_curated, filename=model_dirpath / f"{pcmodel_curated}.json")
# pcmodel_curated

In [None]:
model

In [None]:
pcmodel