# Percentage of total reaction abundance mapped to a subsystem

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
save_files = False

In [None]:
subsys_mapping = pd.read_csv(
    "../data/processed_files/subsystem_duplicated.csv", index_col=0
)
reaction_abundance = pd.read_csv("../data/processed_files/abund_and_metadata.csv")

In [None]:
subsys_mapping

In [None]:
reaction_abundance

In [None]:
reaction_abundance.columns[-50:]

In [None]:
reaction_abundance.index = reaction_abundance["#SampleID"]
model = reaction_abundance["Model"]
reaction_abundance = reaction_abundance[[c for c in reaction_abundance.columns[0:-49]]]
reaction_abundance.drop("#SampleID", axis=1, inplace=True)
reaction_abundance

In [None]:
# Normalize the reaction abundance per sample
reaction_abundance_norm = reaction_abundance.div(reaction_abundance.sum(axis=1), axis=0)
reaction_abundance_norm

In [None]:
# Check that we normalized on the correct axis
reaction_abundance_norm.sum(axis=1)

In [None]:
# Filter by subsystems
reaction_abundance_subsys = reaction_abundance_norm[
    [
        c
        for c in np.unique(subsys_mapping["rxn"])
        if c in reaction_abundance_norm.columns
    ]
]
reaction_mapped = pd.DataFrame(
    reaction_abundance_subsys.sum(axis=1), columns=["Mapped reaction abundance"]
)
reaction_mapped["Model"] = model
reaction_mapped

In [None]:
sns.boxplot(data=reaction_mapped, y="Model", x="Mapped reaction abundance")
plt.title(
    "Fraction of the reaction abundance corresponding to \n reactions mapped to a subsystem"
)

In [None]:
save_files = False

#### Generate data for Extended Figure 6d

In [None]:
if save_files:
    # Data for Extended Figure 6e

    reaction_mapped.to_csv("../data/tables/Supp_figure_mapped_reaction_to_subsys.csv")

In [None]:
# How many different reactions have we mapped?
len(
    [
        c
        for c in np.unique(subsys_mapping["rxn"])
        if c in reaction_abundance_norm.columns
    ]
)

In [None]:
# Subsys mapping table for the 7604 metabolic reactions we have identified
subsys_copy = subsys_mapping.copy()
subsys_copy.index = subsys_copy["rxn"]
subsys_copy

In [None]:
len([c for c in subsys_copy.index if c in reaction_abundance_norm.columns])

#### Generate Supplementary Table 8

In [None]:
subsystems_table = subsys_copy.loc[
    [c for c in np.unique(subsys_copy.index) if c in reaction_abundance_norm.columns], :
]
subsystems_table.sort_index(inplace=True)
subsystems_table.drop(["rxn", "num_diff_subsys"], inplace=True, axis=1)
subsystems_table

In [None]:
# Supplementary Table 8
subsystems_table.to_csv("../data/tables/Supp_table_reaction_to_subsystem_mapping.csv")

# List of all reactions

In [None]:
all_reac = pd.Series(reaction_abundance_norm.columns)
all_reac

In [None]:
all_reac.to_csv("../data/manuscript/Supp_table_7604_identified_reactions.csv")

# Mean and median of the reaction abundances

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [None]:
reaction_abundance = pd.read_csv(
    "../data/processed_files/abund_and_metadata.csv", index_col=0
)
reaction_abundance

In [None]:
abund_only = reaction_abundance[reaction_abundance.columns[:-49]]
abund_only

In [None]:
# Mean and median of all reaction abundances
print("Mean", np.mean(abund_only.to_numpy().flatten()))
print("Median", np.median(abund_only.to_numpy().flatten()))