# Simulate models using pcFVA - REDS Recall, ATP11C V972M
## Setup
### Import packages

In [None]:
import io
import re
import shutil
import tempfile
import warnings
import zipfile
from collections import defaultdict
from pathlib import Path

import gurobipy as gp
import numpy as np
import pandas as pd
from cobra.core import get_solution
from cobra.exceptions import OptimizationError
from cobra.flux_analysis.variability import (
    find_blocked_reactions,
    flux_variability_analysis,
)
from cobra.manipulation import remove_genes
from rbc_gem_utils import (
    COBRA_CONFIGURATION,
    get_dirpath,
    handle_msg,
    read_cobra_model,
    show_versions,
    write_cobra_model,
)
from rbc_gem_utils.analysis.overlay import (
    DEFAULT_PREFIX_SUFFIX_VALUES,
    DEFAULT_PROTEOME_COMPARTMENT,
    BudgetDilution,
    ProteinDilution,
    add_relaxation_budget,
    load_overlay_model,
)

gp.setParam("OutputFlag", 0)
gp.setParam("LogToConsole", 0)

# Show versions of notebook
show_versions()

### Define configuration
#### COBRA Configuration

In [None]:
COBRA_CONFIGURATION.solver = "gurobi"
# Set bound defaults much larger to prevent model loading issues
COBRA_CONFIGURATION.bounds = (-1e-8, 1e8)
COBRA_CONFIGURATION.tolerance = 1e-9
COBRA_CONFIGURATION.processes = 30  # Cannot exceed number of cores - 1
COBRA_CONFIGURATION

### Define organism, model, and dataset

In [None]:
organism = "Human"
model_id = "RBC_GEM"
dataset_name = "REDSRecall"
genotype = "ATP11C_V972M"
grouped_data_key = f"{genotype}_Sample"
grouped_data_key

### Set variables for sample identification

In [None]:
sample_key = "SAMPLE ID"
donor_key = "PUBLIC RECALL DONOR ID"

# For sample IDs
donor_re = re.compile(rf"(?P<donor>S(?P<num>\d\d\d))")
operations = "|".join([x.capitalize() for x in ["mean", "median"]])
operation_re = re.compile(r"(?P<op>" + operations + r")\_(?P<group>\w+)")
sample_id_re = re.compile(r"(?!" + operations + r")" + donor_re.pattern + r"\_")

### Set computation options

In [None]:
ftype = "xml"  # In our experience, SBML/XML loads faster, but will take up to 4x more space uncompressed as compared to JSON
run_computations = True  # Keep off to use previously computed results
overwrite = True  # Whether to allow overwriting of previous simulation results
verbose = True

# Objective reactions
objective_reactions = ["NaKt"]
# Reactions that must have the capability to carry flux, sort for consistency
required_flux_reactions = ["PSFLIPt"]  # Add reactions to this list
required_flux_reactions = sorted(set(objective_reactions + required_flux_reactions))

only_flux_abundance_reactions = (
    False  # Only simulate reactions necessary for flux-abundance correlations
)
min_relax_budget_for_objectives = True
# Remove blocked reactions before pcFVA simulation.
# For large models and/or multiple runs at different optimums, will speed up computation and potentially improve results.
remove_blocked_reactions = True
# Relaxation reactions that should be restricted to inactive
protein_relaxations_to_restrict = ["ATP11A", "ATP11B", "ATP11C", "TMEM30A"]
# Protein constraints that need lower bounds relaxed to prevent increase of
# associated subunits in direct conflict with physiologically known.
# Setting the lower bound prevents from being required.
protein_constraints_lb_to_relax = ["TMEM30A"]
# Expected as percentages (e.g., 0, 50, 90, 99, 100)
optimum_percents = [0]
# RUn parsimonius FBA
pfba_factor = None
# Whether to run loopless pcFVA
loopless = False

zip_kwargs = dict(compression=zipfile.ZIP_DEFLATED, compresslevel=None)

In [None]:
protein_rxn_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["proteins"]["prefix.dilution"]
protein_met_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["proteins"]["prefix.metabolite"]
relaxation_rxn_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["proteins"]["prefix.relaxation"]
enzyme_met_suffix_total = DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["suffix.total"]
enzyme_rxn_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["prefix.dilution"]
enzyme_met_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["prefix.metabolite"]
budget_rxn_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["budgets"]["prefix.dilution"]
budget_met_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["budgets"]["prefix.metabolite"]
comp_suffix = f"_{DEFAULT_PROTEOME_COMPARTMENT}"

### Set figure options

In [None]:
save_figures = True
transparent = False
imagetype = "svg"

### Set paths

In [None]:
# Set paths
overlay_dirpath = get_dirpath("analysis") / "OVERLAY" / organism
model_dirpath = overlay_dirpath / model_id
results_dirpath = (
    get_dirpath(use_temp="processed")
    / model_id
    / "OVERLAY"
    / organism
    / dataset_name
    / grouped_data_key
)
pcfva_results_dirpath = (
    results_dirpath
    / "pcFVA"
    / "_".join(("REQ", *required_flux_reactions))
    / "_".join(("OBJ", *objective_reactions))
)
pcfva_results_dirpath.mkdir(exist_ok=True, parents=True)
# ZIP directories
sample_pcmodels_dirpath = results_dirpath / "pcmodels"
reduced_models_dirpath = pcfva_results_dirpath.parent / "reduced_pcmodels"

## Load RBC-GEM model

In [None]:
model = read_cobra_model(filename=model_dirpath / f"{model_id}.xml")
pcmodel = load_overlay_model(filename=model_dirpath / f"{model_id}_PC.xml")

# Add relaxation budget to initial PC model to get names of relaxation reactions
add_relaxation_budget(pcmodel, 0, verbose=False)
pcmodel

### Check settings

In [None]:
# Check settings here
invalid = sorted(set([x for x in objective_reactions if x not in pcmodel.reactions]))
if invalid:
    raise ValueError(f"Objective reactions not found in model: {invalid}")
invalid = sorted(
    set([x for x in required_flux_reactions if x not in pcmodel.reactions])
)
if invalid:
    raise ValueError(f"Required flux capable reactions not found in model: {invalid}")
invalid = sorted(
    set(
        [
            x
            for x in protein_relaxations_to_restrict + protein_constraints_lb_to_relax
            if x not in pcmodel.genes
        ]
    )
)
if invalid:
    raise ValueError(f"Genes/Proteins not found in model: {invalid}")
invalid = sorted(set([x for x in optimum_percents if x < 0]))
if invalid:
    raise ValueError(f"Optimum values must be non-negative: {invalid}")
if pfba_factor is not None and pfba_factor < 1:
    raise ValueError(f"The pFBA factor should be greater than 1 if set.")

simulation_options_text = "\n".join(
    (
        "Simulation Options",
        "================================",
        f"min_relax_budget_for_objectives:\n\t{min_relax_budget_for_objectives}",
        f"only_flux_abundance_reactions:\n\t{only_flux_abundance_reactions}",
        f"remove_blocked_reactions:\n\t{remove_blocked_reactions}",
        f"pFBA_factor:\n\t{pfba_factor}",
        f"loopless:\n\t{loopless}",
        f"objective_reactions:\n\t{objective_reactions}",
        f"required_flux_reactions:\n\t{required_flux_reactions}",
        f"protein_relaxations_to_restrict:\n\t{protein_relaxations_to_restrict}",
        f"protein_constraints_lb_to_relax:\n\t{protein_constraints_lb_to_relax}",
    )
)
print(simulation_options_text)

### Define list of PC-models to load for simulation

In [None]:
with zipfile.ZipFile(f"{sample_pcmodels_dirpath}.zip", mode="r") as zfile:
    pcmodel_names = sorted(
        [Path(x).name.replace(Path(x).suffix, "") for x in zfile.namelist() if x]
    )
if sample_pcmodels_dirpath.exists():
    pcmodel_names += [filename.name for filename in sample_pcmodels_dirpath.iterdir()]
# Check sample directory for existing files
if not len(pcmodel_names) == len(set(pcmodel_names)):
    raise ValueError(
        f"Duplicates found: {[k for k, v in Counter(pcmodel_names).items() if v > 1]}"
    )

models_to_simulate = sorted(set(pcmodel_names))
handle_msg(f"Number of models to simulate: {len(models_to_simulate)}", print_msg=True)
models_to_simulate;

### Generate results using pcFVA for context specific models
Note that this can take a signficiant amount of time depending on the number of models and their sizes. Best to use a targeted approach in generating results. 
Alternatively, skip result generation and load the previously generated results.

In [None]:
# Reactions in addition to the minimum for flux-abundance correlations
list_of_reactions = []
# # Use to get ALL reactions in the original model
list_of_reactions += model.reactions.list_attr("id")
# # Use to get ALL reactions in the PC model
# list_of_reactions += pcmodel.reactions.list_attr("id")

#### Generate results for subset of PC model reactions
##### Reactions necessary for all flux-abundance correlation computations.
To reduce computation time, a subset of reactions can be defined. 
For flux-abundance correlations, the minimum reaction set are reactions associated with genes associated and the corresponding enzyme dilution reaction for total enzyme.

In [None]:
min_reaction_list = model.reactions.query(lambda x: x.gene_reaction_rule).list_attr(
    "id"
)
# Add protein dilutons to see effective protein concentrations used
min_reaction_list += pcmodel.reactions.query(
    lambda x: isinstance(x, ProteinDilution)
).list_attr("id")
min_reaction_list += pcmodel.reactions.query(
    lambda x: isinstance(x, BudgetDilution)
).list_attr("id")

# Already limited to reactions with gene reaction rules
enzyme_totals_list = pcmodel.metabolites.query(
    lambda x: x.id.startswith(f"{enzyme_met_prefix}")
    and enzyme_met_suffix_total in x.id
)
enzyme_reaction_map = {
    f"{enzyme_rxn_prefix}{x}": x.id.replace(f"{enzyme_met_prefix}", "").replace(
        f"{enzyme_met_suffix_total}_{x.compartment}", ""
    )
    for x in enzyme_totals_list
}
# Combine lists
min_reaction_list += list(enzyme_reaction_map)
handle_msg(
    f"Minimum number of reactions minimize/maximize (minimum): {len(min_reaction_list)} / {len(pcmodel.reactions)}",
    print_msg=True,
)

##### Refined set of PC model reactions

In [None]:
if only_flux_abundance_reactions:
    reaction_list = min_reaction_list.copy()
else:
    list_of_reactions = [getattr(rid, "_id", rid) for rid in list_of_reactions]
    reaction_list = sorted(
        [getattr(x, "_id", x) for x in set(min_reaction_list).union(list_of_reactions)]
    )
handle_msg(
    f"Number of reactions minimize/maximize (chosen): {len(reaction_list)} / {len(pcmodel.reactions)}",
    print_msg=True,
)

### Determine previously existing solutions

In [None]:
existing_files = defaultdict(set)
handle_msg("Format: (zipped + unzipped)/total", print_msg=True)
if remove_blocked_reactions:
    reduced_models_dirpath.mkdir(exist_ok=True, parents=True)
    if (
        Path(f"{reduced_models_dirpath}.zip").exists()
        or Path(f"{reduced_models_dirpath}").exists()
    ) and not overwrite:
        # Search reduced model directory and zip file
        if Path(f"{reduced_models_dirpath}.zip").exists():
            with zipfile.ZipFile(f"{reduced_models_dirpath}.zip", mode="r") as zfile:
                existing_files[reduced_models_dirpath.name].update(
                    [
                        Path(x).name.replace(f".{ftype}", "")
                        for x in zfile.namelist()
                        if x
                    ]
                )
        in_the_zip = len(existing_files[reduced_models_dirpath.name])
        existing_files[reduced_models_dirpath.name].update(
            [
                x.name.replace(f".{ftype}", "")
                for x in list(reduced_models_dirpath.iterdir())
            ]
        )
        total = len(existing_files[reduced_models_dirpath.name])
        handle_msg(
            f"Number of existing reduced models: ({in_the_zip} + {total - in_the_zip}) / {total}\n",
            print_msg=True,
        )
    else:
        existing_files[reduced_models_dirpath.name].update([])

for optimum in optimum_percents:
    # Replace decimals with hyphens
    dirname = (
        f"Opt{optimum}"
        + (f"_pFBA{pfba_factor}" if pfba_factor else "")
        + ("loopless" if loopless else "")
    ).replace(".", "-")
    optimum_dirpath = (
        pcfva_results_dirpath.parent if optimum == 0 else pcfva_results_dirpath
    ) / dirname
    optimum_dirpath.mkdir(exist_ok=True, parents=True)
    if (
        Path(f"{optimum_dirpath}.zip").exists() or Path(f"{optimum_dirpath}").exists()
    ) and not overwrite:
        # Search reduced model directory and zip file
        if Path(f"{optimum_dirpath}.zip").exists():
            with zipfile.ZipFile(f"{optimum_dirpath}.zip", mode="r") as zfile:
                existing_files[optimum_dirpath.name].update(
                    [Path(x).name for x in zfile.namelist() if x]
                )
        in_the_zip = len(existing_files[optimum_dirpath.name])
        existing_files[optimum_dirpath.name].update(
            [x.name for x in list(optimum_dirpath.iterdir())]
        )
        total = len(existing_files[optimum_dirpath.name])
        handle_msg(
            "\t".join(
                (
                    f"Optimum: {optimum}"
                    + (
                        ""
                        if pfba_factor is None
                        else "\tpFBA factor: " + str(pfba_factor)
                    ),
                    f"Number of existing solution: ({in_the_zip} + {total - in_the_zip}) / {total}",
                )
            ),
            print_msg=True,
        )
    else:
        existing_files[optimum_dirpath.name].update([])
existing_files;

## Run pcFVA
### Define helper functions

In [None]:
def find_and_remove_blocked_reactions(
    pcmodel_sample, relaxation_rxns_required=None, prevent_removal=None, verbose=False
):
    if prevent_removal is None:
        prevent_removal = []
    pcmodel_sample = pcmodel_sample.copy()
    n_reactions_original = len(pcmodel_sample.reactions)
    n_genes_original = len(pcmodel_sample.genes)
    pcmodel_sample.objective = sum(
        [
            r.flux_expression
            for r in pcmodel_sample.reactions.get_by_any(required_flux_reactions)
        ]
    )
    for relax_rxn in pcmodel_sample.reactions.query(
        lambda x: x.id.startswith(relaxation_rxn_prefix)
    ):
        if relax_rxn.id in relaxation_rxns_required:
            continue
        else:
            relax_rxn.bounds = (0, 0)

    reactions_to_remove = find_blocked_reactions(
        model=pcmodel_sample,
        reaction_list=None,
        zero_cutoff=COBRA_CONFIGURATION.tolerance,
        open_exchanges=True,
        processes=min(60, COBRA_CONFIGURATION.processes),
    )
    reactions_to_remove = sorted(
        set(reactions_to_remove).difference(
            [
                rxn
                for prot in prevent_removal
                for rxn in [
                    f"{protein_rxn_prefix}{protein_met_prefix}{prot}{comp_suffix}",
                    f"{relaxation_rxn_prefix}{protein_met_prefix}{prot}{comp_suffix}",
                ]
            ]
        )
    )
    pcmodel_sample.remove_reactions(reactions_to_remove, remove_orphans=True)
    genes_to_remove = [
        gene.id
        for gene in pcmodel_sample.genes
        if not pcmodel_sample.reactions.has_id(
            f"{protein_rxn_prefix}{protein_met_prefix}{gene.id}{comp_suffix}"
        )
        and not pcmodel_sample.reactions.has_id(
            f"{relaxation_rxn_prefix}{protein_met_prefix}{gene.id}{comp_suffix}"
        )
    ]
    genes_to_remove = sorted(set(genes_to_remove).difference(prevent_removal))
    remove_genes(pcmodel_sample, gene_list=genes_to_remove, remove_reactions=True)
    handle_msg(
        f"Number of blocked reactions removed: {n_reactions_original - len(pcmodel_sample.reactions)}",
        print_msg=verbose,
    )
    handle_msg(
        f"Number of associated genes removed: {n_genes_original - len(pcmodel_sample.genes)}",
        print_msg=verbose,
    )
    return pcmodel_sample

### Simulate

In [None]:
columns = ["model", "reactions", "optimum", "min", "max"]
zip_kwargs = dict(compression=zipfile.ZIP_DEFLATED, compresslevel=None)
if run_computations:
    for idx, pcmodel_sample_id in enumerate(models_to_simulate, start=1):
        handle_msg(
            "\n".join(
                (
                    "====================================================",
                    f"Computing pcFVA results for {pcmodel_sample_id}",
                    "====================================================",
                    f"Loading PC-model for {pcmodel_sample_id}",
                )
            ),
            print_msg=verbose,
        )
        # See if a reduced model has already been created for simulation.
        if (
            not overwrite
            and remove_blocked_reactions
            and pcmodel_sample_id in existing_files[reduced_models_dirpath.name]
        ):
            handle_msg(f"Previously generated reduced model found.", print_msg=True)
            # Check if all simulations have been performed with reduced modelk
            dirnames = [
                (
                    f"Opt{optimum}"
                    + (f"_pFBA{pfba_factor}" if pfba_factor else "")
                    + ("loopless" if loopless else "")
                ).replace(".", "-")
                for optimum in optimum_percents
            ]
            if all(
                [
                    f"{pcmodel_sample_id}_FVAsol.csv" in existing_files[dirname]
                    for dirname in dirnames
                ]
            ):
                handle_msg(
                    "All simulations already performed for model", print_msg=verbose
                )
                continue
            else:
                # Not all simulations performed, therefore load existing reduced model
                try:
                    with zipfile.ZipFile(f"{reduced_models_dirpath}.zip", "r") as zfile:
                        with zfile.open(
                            f"{pcmodel_sample_id}.{ftype}", "r"
                        ) as model_file:
                            pcmodel_sample = load_overlay_model(
                                filename=io.StringIO(model_file.read().decode("utf-8")),
                                filetype=ftype,
                            )
                except (KeyError, FileNotFoundError):
                    # If file not found, model hasn't been put into zip file yet
                    pcmodel_sample = load_overlay_model(
                        filename=reduced_models_dirpath / f"{pcmodel_sample_id}.{ftype}"
                    )
        else:
            # Load full model
            try:
                with zipfile.ZipFile(f"{sample_pcmodels_dirpath}.zip", "r") as zfile:
                    with zfile.open(f"{pcmodel_sample_id}.{ftype}", "r") as model_file:
                        pcmodel_sample = load_overlay_model(
                            filename=io.StringIO(model_file.read().decode("utf-8")),
                            filetype=ftype,
                        )
            except (FileNotFoundError, KeyError):
                # If file not found, model hasn't been put into zip file yet
                pcmodel_sample = load_overlay_model(
                    filename=sample_pcmodels_dirpath / f"{pcmodel_sample_id}.{ftype}"
                )
            # Restrict relaxation reactions for specific proteins
            for protein in protein_relaxations_to_restrict:
                protein_met = pcmodel_sample.metabolites.get_by_id(
                    f"{protein_met_prefix}{protein}{comp_suffix}"
                )
                relax_prot_rxn = pcmodel_sample.reactions.get_by_id(
                    f"{relaxation_rxn_prefix}{protein_met.id}"
                )
                relax_prot_rxn.bounds = (0, 0)
            # Relax lower bound constraint for specific proteins
            for protein in protein_constraints_lb_to_relax:
                protein_met = pcmodel_sample.metabolites.get_by_id(
                    f"{protein_met_prefix}{protein}{comp_suffix}"
                )
                protein_rxn = pcmodel_sample.reactions.get_by_id(
                    f"{protein_rxn_prefix}{protein_met.id}"
                )
                protein_rxn.lower_bound = 0

            budget_rxn_relaxation = pcmodel_sample.reactions.get_by_id(
                f"{budget_rxn_prefix}{budget_met_prefix}relaxation"
            )
            # Determine smallest allowable relxation budget that allows flux through objectives and set as upper bound
            with pcmodel_sample:
                pcmodel_sample.objective = (
                    sum(
                        [
                            r.flux_expression
                            for r in pcmodel_sample.reactions.get_by_any(
                                required_flux_reactions
                            )
                        ]
                    )
                    - budget_rxn_relaxation.flux_expression
                )
                pcmodel_sample.objective_direction = "max"
                # Fail loudly, should not occur unless a restricted relaxation proteins are absolutely necessary
                pcmodel_sample.slim_optimize(error_value=None)
                relaxation_rxns_required = get_solution(
                    pcmodel_sample,
                    reactions=pcmodel_sample.reactions.query(
                        lambda x: x.id.startswith(relaxation_rxn_prefix)
                    ),
                )
                relaxation_rxns_required = set(
                    relaxation_rxns_required.fluxes[
                        relaxation_rxns_required.fluxes != 0
                    ].index.to_list()
                )
                budget_min = budget_rxn_relaxation.flux
            if min_relax_budget_for_objectives:
                budget_rxn_relaxation.upper_bound = budget_min

            if remove_blocked_reactions:
                handle_msg(
                    "Determining reactions and protein constraints to remove",
                    print_msg=verbose,
                )
                with warnings.catch_warnings(action="ignore"):
                    pcmodel_sample = find_and_remove_blocked_reactions(
                        pcmodel_sample,
                        relaxation_rxns_required=relaxation_rxns_required,
                        prevent_removal=list(
                            set(protein_relaxations_to_restrict).union(
                                protein_constraints_lb_to_relax
                            )
                        ),
                        verbose=verbose,
                    )
                pcmodel_sample.slim_optimize()
                # Write reduced model into directory
                write_cobra_model(
                    pcmodel_sample,
                    filename=reduced_models_dirpath / f"{pcmodel_sample.id}.{ftype}",
                )
        # Set objective reaction(s)
        pcmodel_sample.objective = sum(
            [
                r.flux_expression
                for r in pcmodel_sample.reactions.get_by_any(objective_reactions)
            ]
        )
        # Not all reactions exist for reduced models, regenerate list per model
        rxn_list = [x for x in reaction_list if x in pcmodel_sample.reactions]
        handle_msg(
            f"Number of reactions minimize/maximize for sample: {len(rxn_list)} / {len(pcmodel.reactions)}",
            print_msg=verbose,
        )
        handle_msg(f"Starting simulations for {pcmodel_sample}", print_msg=verbose)
        for optimum in optimum_percents:
            dirname = (
                f"Opt{optimum}"
                + (f"_pFBA{pfba_factor}" if pfba_factor else "")
                + ("loopless" if loopless else "")
            ).replace(".", "-")
            filename = f"{pcmodel_sample}_FVAsol.csv"
            if not overwrite and filename in existing_files[dirname]:
                handle_msg(
                    f"Already simulated at optimum: {optimum}"
                    + (
                        ""
                        if pfba_factor is None
                        else " pFBA factor: " + str(pfba_factor)
                    ),
                    print_msg=verbose,
                )
                continue
            fraction_of_optimum = round(optimum / 100, 4)
            try:
                pcfva_sol = flux_variability_analysis(
                    pcmodel_sample,
                    reaction_list=rxn_list,
                    loopless=loopless,
                    pfba_factor=pfba_factor,
                    fraction_of_optimum=fraction_of_optimum,
                    processes=min(60, COBRA_CONFIGURATION.processes),
                )
            except OptimizationError as e:
                msg = f"{pcmodel_sample_id} failed due to an exception."
                handle_msg(msg, print_msg=verbose)
                with open(
                    pcfva_results_dirpath / f"pcFVA-errors-{dirname}.log", "a"
                ) as file:
                    file.write(f"{msg} {str(e)}\n")
            else:
                pcfva_sol.index.name = "reactions"
                pcfva_sol = pcfva_sol.reset_index(drop=False)
                pcfva_sol["model"] = pcmodel_sample.id
                pcfva_sol["optimum"] = fraction_of_optimum
                pcfva_sol = pcfva_sol.rename(
                    {"minimum": "min", "maximum": "max"}, axis=1
                )
                optimum_dirpath = (
                    pcfva_results_dirpath.parent
                    if optimum == 0
                    else pcfva_results_dirpath
                ) / dirname
                # Save simulation results
                pcfva_sol.to_csv(optimum_dirpath / filename, index=False)
                handle_msg(f"Finished pcFVA for optimum: {optimum}", print_msg=verbose)
        handle_msg(f"Finished pcFVA for {pcmodel_sample_id}", print_msg=verbose)
handle_msg("Compressing results", print_msg=verbose)
dirpaths = [reduced_models_dirpath]
for optimum in optimum_percents:
    dirname = (
        f"Opt{optimum}"
        + (f"_pFBA{pfba_factor}" if pfba_factor else "")
        + ("loopless" if loopless else "")
    ).replace(".", "-")
    dirpaths += [
        (pcfva_results_dirpath.parent if optimum == 0 else pcfva_results_dirpath)
        / dirname
    ]

for dirpath in dirpaths:
    with tempfile.TemporaryDirectory() as tmpdir:
        if not overwrite and Path(f"{dirpath}.zip").exists():
            handle_msg("Copying original zip to temporary directory", print_msg=verbose)
            shutil.copy(f"{dirpath}.zip", tmpdir)
        handle_msg("Appending model files to temporary zip file", print_msg=verbose)
        with zipfile.ZipFile(
            f"{tmpdir}/{dirpath.name}.zip", "a", **zip_kwargs
        ) as zfile:
            existing_files = set([Path(x).name for x in zfile.namelist() if x])
            for filename in list(dirpath.iterdir()):
                if filename.name in existing_files:
                    continue
                zfile.write(f"{filename}", arcname=f"{filename.name}")
        # Replacing original directory
        handle_msg("Setting temporary zip file as the new zip file", print_msg=verbose)
        shutil.copy(f"{tmpdir}/{dirpath.name}.zip", dirpath.parent)
    handle_msg("Finished compression, cleaning up files", print_msg=verbose)
    shutil.rmtree(str(dirpath))
    handle_msg("Finished cleanup", print_msg=verbose)
    print()

### Combine all simulation results

In [None]:
# Load combine solutions into single DataFrame


def read_csv_from_zip(csv_file):
    with zfile.open(csv_file) as f:
        return pd.read_csv(f)


dfs_to_concat = []
for optimum in optimum_percents:
    dirname = [f"Opt{optimum}"]
    dirname += [f"pFBA{pfba_factor}"] if pfba_factor is not None else []
    dirname = "_".join(dirname).replace(".", "-")
    dirpath = pcfva_results_dirpath.parent if optimum == 0 else pcfva_results_dirpath
    with zipfile.ZipFile(dirpath / f"{dirname}.zip", "a", **zip_kwargs) as zfile:
        dfs_to_concat += [
            read_csv_from_zip(csv_file)
            for csv_file in zfile.namelist()
            if csv_file.endswith(".csv")
        ]

df_pcfva_all = pd.concat(dfs_to_concat, ignore_index=True)

# Fill missing (blocked) reactions with zero values to have all reactions for all models at all optimums
df_all_reactions = pd.DataFrame(
    index=pd.Index(df_pcfva_all["reactions"].unique(), name="reactions"),
)


columns_to_fill = ["reactions", "model", "optimum"]
for idx, col in enumerate(columns_to_fill[1:], start=2):
    # Create initial column
    df_all_reactions.loc[:, col] = pd.NA
    # Fill unique values
    df_all_reactions.loc[:, col] = set(df_pcfva_all[col].unique())
    # Explode to propogate
    df_all_reactions = df_all_reactions.explode(col).reset_index(drop=False)
    df_all_reactions = df_all_reactions.set_index(columns_to_fill[:idx])
df_all_reactions = df_all_reactions.sort_index()
df_pcfva_all = df_all_reactions.merge(
    df_pcfva_all.set_index(columns_to_fill),
    left_index=True,
    right_index=True,
    how="left",
)
df_pcfva_all = df_pcfva_all.reset_index(drop=False)
# Ensure fill worked as expected
assert len(df_pcfva_all.index) == np.prod(
    [df_pcfva_all[col].nunique() for col in columns_to_fill]
)
df_pcfva_all

## Export results

In [None]:
compression_opts = dict(method="zip", archive_name=f"{pcmodel.id}_All_FVAsols.csv")
compression_opts.update(zip_kwargs)
df_pcfva_all.to_csv(
    pcfva_results_dirpath / f"{pcmodel.id}_All_FVAsols.zip",
    index=False,
    compression=compression_opts,
)

## Test load pcFVA generated results

In [None]:
# Test to see if results were recently generated in this run, otherwise load DataFrame of generated results
df_pcfva_all = pd.read_csv(
    pcfva_results_dirpath / f"{pcmodel.id}_All_FVAsols.zip",
    index_col=None,
)
df_pcfva_all