# Investigate for which subsystems the random effects on the 'Origin' label was signifiant.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import fisher_exact

In [None]:
results_dir = "../../data/results_files/"
pd.set_option("display.max_rows", 50)

In [None]:
res = pd.read_csv(results_dir + "results.csv", index_col=0)
emmeans = pd.read_csv(results_dir + "emmeans.csv", index_col=0)

## Match to subsystems

In [None]:
subsystems = pd.read_csv(
    "../../data/processed_files/subsystem_duplicated.csv",
    index_col=0,
)
rxn = pd.read_csv("../../data/processed_files/rxn_names.csv", index_col=0)
rxn.columns = ["name"]

In [None]:
res["Reaction"] = rxn["name"]
subsystem_col = []

for r in res.index:
    subsys = subsystems[subsystems["rxn"] == r]["level3"]
    subsys_list = list(subsys)
    if len(subsys_list) == 0:
        subsys_list = ["Unknown"]
    subsystem_col.append(subsys_list)
subsystem_col = pd.Series(subsystem_col, index=res.index)
res["Subsystem"] = subsystem_col

## Select reactions with significant random effects at the origin

In [None]:
origin_ranef = res[
    ["resid std", "Origin std", "Intercept est", "Subsystem", "israndom"]
]
origin_ranef

In [None]:
# Proportion of models with origin ranef
len(origin_ranef[origin_ranef["israndom"].isin(["OC", "O"])]) / len(origin_ranef)

In [None]:
# Proportion of models with chip ranef
len(origin_ranef[origin_ranef["israndom"].isin(["OC", "C"])]) / len(origin_ranef)

In [None]:
# Proportion of models with no ranef
len(origin_ranef[origin_ranef["israndom"].isin(["fixed"])]) / len(origin_ranef)

In [None]:
sns.boxplot(
    data=origin_ranef[origin_ranef["israndom"].isin(["OC", "O"])], x="Origin std"
)

Then duplicate the rows for reactions with multiple subsystems

In [None]:
origin_matrix = []
for reaction_name in origin_ranef.index:
    for subsys in origin_ranef.loc[reaction_name, "Subsystem"]:
        origin_matrix.append(
            [
                reaction_name,
                origin_ranef.loc[reaction_name, "israndom"],
                origin_ranef.loc[reaction_name, "Origin std"],
                origin_ranef.loc[reaction_name, "resid std"],
                origin_ranef.loc[reaction_name, "Intercept est"],
                subsys,
            ]
        )

origin_df = pd.DataFrame(
    origin_matrix,
    columns=[
        "Reaction",
        "israndom",
        "Origin std",
        "resid std",
        "Intercept est",
        "Subsystem",
    ],
)
origin_df

Then run Fisher's exact test

In [None]:
all_subsys = origin_df["Subsystem"].unique()

In [None]:
subsystems_enriched = []
ranef_sig = origin_df[origin_df["israndom"].isin(["O", "OC"])]
ranef_nonsig = origin_df[~origin_df["israndom"].isin(["O", "OC"])]

for subsys in all_subsys:
    if subsys != "Unknown":
        contingency_mat = np.zeros((2, 2))
        contingency_mat[0, 0] = len(ranef_sig[ranef_sig["Subsystem"] == subsys])
        contingency_mat[0, 1] = len(ranef_sig) - contingency_mat[0, 0]
        contingency_mat[1, 0] = len(ranef_nonsig[ranef_nonsig["Subsystem"] == subsys])
        contingency_mat[1, 1] = len(ranef_nonsig) - contingency_mat[1, 0]
        res = fisher_exact(contingency_mat, alternative="greater")
    pvalue = res.pvalue * (len(all_subsys) - 1)  # Bonferroni correction
    if pvalue <= 0.05:
        print(
            f"Corrected pvalues with Bonferoni correction, initial pvalue {res.pvalue}, corrected pvalue {pvalue}"
        )
        subsystems_enriched.append(subsys)

subsystems_enriched

In [None]:
sns.boxplot(
    data=origin_df[origin_df["Subsystem"].isin(subsystems_enriched)],
    y="Subsystem",
    x="Origin std",
)
sns.swarmplot(
    data=origin_df[origin_df["Subsystem"].isin(subsystems_enriched)],
    y="Subsystem",
    x="Origin std",
    color="k",
    s=5,
)

effect_thresh = 0.01
ymin = -1
ymax = 1

plt.vlines([0], ymin=ymin, ymax=ymax, colors=["g"], linestyles=["dashed"], label="0")
plt.vlines(
    [-effect_thresh, effect_thresh],
    ymin=ymin,
    ymax=ymax,
    colors=["orange"],
    linestyles=["dashed"],
    label=f"+/- {effect_thresh}",
)