# Compute correlations with metadata
## Setup
### Import packages

In [None]:
import logging
import re
import textwrap
import warnings
from collections import Counter, defaultdict
from pathlib import Path

import gurobipy as gp
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import optlang
import pandas as pd
import seaborn as sns
import sympy
from cobra import DictList, Reaction
from cobra.flux_analysis.variability import (
    find_blocked_reactions,
    flux_variability_analysis,
)
from cobra.util.array import create_stoichiometric_matrix, nullspace
from mpl_toolkits.axes_grid1 import make_axes_locatable
from rbc_gem_utils import (
    ANNOTATION_PATH,
    COBRA_CONFIGURATION,
    CURATION_PATH,
    DATABASE_PATH,
    GEM_NAME,
    INTERIM_PATH,
    PARAMETERIZATION_PATH,
    PROCESSED_PATH,
    ROOT_PATH,
    build_string,
    check_database_version_online,
    check_version,
    compare_tables,
    explode_column,
    get_annotation_df,
    read_cobra_model,
    read_rbc_model,
    show_versions,
    split_string,
    visualize_comparison,
    write_cobra_model,
)
from rbc_gem_utils.analysis.overlay import *
from rbc_gem_utils.database.uniprot import (
    UNIPROT_DB_TAG,
    UNIPROT_ISOFORM_ID_RE,
    UNIPROT_PATH,
)
from rbc_gem_utils.qc import reset_reaction_bounds, reset_subsystem_groups
from rbc_gem_utils.util import (
    AVOGADRO_NUMBER,
    DEFAULT_DRY_MASS_PER_CELL,
    convert_gDW_to_L,
    convert_L_to_gDW,
    ensure_iterable,
    log_msg,
    strip_plural,
)
from rbc_gem_utils.visualization import cmap_map
from scipy.cluster.hierarchy import linkage
from scipy.stats import spearmanr
from sklearn.metrics import r2_score

gp.setParam("OutputFlag", 0)
gp.setParam("LogToConsole", 0)

# Show versions of notebook
show_versions()
import matplotlib.pyplot as plt

plt.rcParams["font.family"] = "Arial"

### Define configuration
#### COBRA Configuration

In [None]:
COBRA_CONFIGURATION.solver = "gurobi"
# Set bound defaults much larger to prevent model loading issues
COBRA_CONFIGURATION.bounds = (-1e8, 1e8)
COBRA_CONFIGURATION

## Load RBC-GEM model

In [None]:
data_path = Path("data").resolve()
models_path = Path("models").resolve()
figures_path = Path("figures").resolve()

dataset_name = "RBComics"
dataset_path = Path(dataset_name).resolve()
dataset_models_dirpath = Path(f"{dataset_path}/pcmodels")
pcfva_results_dirpath = Path(f"{dataset_path}/pcFVA")
version = "1.2.0"

# Make sure to unzip the models first if you are going to simulate!
sample_prefix, time_prefix = ("S", "D")
# Integers are easier to work with for time points
timepoints = [10, 23, 42]

imagetype = "png"
transparent = True
save_figures = True
overwrite = True

ftype = "xml"
model = read_cobra_model(f"{models_path}/{GEM_NAME.replace('-', '_')}.{ftype}")
pcmodel = load_overlay_model(filename=f"{models_path}/{model.id}_PC.{ftype}")

# For this workflow, shut off complex dilution reactions at the start
for cplx_dilution in pcmodel.reactions.query(lambda x: isinstance(x, ComplexDilution)):
    cplx_dilution.bounds = (0, 0)

add_relaxation_budget(pcmodel, 0, verbose=False)
pcmodel

## Load pcFVA generated results

In [None]:
corr_results_dirpath = Path(f"{dataset_path}/correlations")

In [None]:
# Test to see if results were recently generated in this run, otherwise load DataFrame of generated results
try:
    assert not df_pcfva_all.empty
except (NameError, AssertionError):
    df_pcfva_all = pd.read_csv(
        f"{dataset_path}/{pcmodel.id}_{dataset_name}_FVAresults_ALL.tsv",
        sep="\t",
        index_col=None,
    )

df_pcfva_all

In [None]:
def get_sample_from_id(model_id, sample_prefix=""):
    sample = model_id.rsplit("_", 2)[-2]
    try:
        return int(sample.replace(sample_prefix, ""))
    except ValueError:
        return sample


def get_time_from_id(model_id, time_prefix=""):
    time = model_id.rsplit("_", 2)[-1]
    try:
        return int(time.replace(time_prefix, ""))
    except ValueError:
        return time

In [None]:
list_of_pcmodels = list(df_pcfva_all["model"].unique())
operation_model_ids = sorted(
    [
        x
        for x in list_of_pcmodels
        if not isinstance(get_sample_from_id(x, sample_prefix), (int, float))
    ],
    key=lambda x: (
        get_sample_from_id(x, sample_prefix),
        get_time_from_id(x, time_prefix),
    ),
)
operations = set([get_sample_from_id(x, sample_prefix) for x in operation_model_ids])
operations

In [None]:
enzyme_total_suffix = DEFAULT_ENZYME_TOTAL_SUFFIX
min_reaction_list = model.reactions.query(lambda x: x.gene_reaction_rule).list_attr(
    "id"
)
enzymes_list = pcmodel.reactions.query(
    lambda x: x.id.startswith(f"ENZDL_enzyme_") and enzyme_total_suffix in x.id
).list_attr("id")
relaxation_list = pcmodel.reactions.query(
    lambda x: x.id.startswith(f"RELAX_")
).list_attr("id")
budget_list = pcmodel.reactions.query(lambda x: x.id.startswith(f"PBDL_")).list_attr(
    "id"
)

reaction_enzymes_map = {
    rid: tuple(
        pcmodel.reactions.query(
            lambda x: x.id.startswith(f"ENZDL_enzyme_{rid}_")
        ).list_attr("id")
    )
    for rid in min_reaction_list
}
enzyme_reaction_map = {
    enzyme: rid for rid, enzymes in reaction_enzymes_map.items() for enzyme in enzymes
}
if not enzymes_list:
    enzymes_list = [
        enzyme
        for enzyme, rid in enzyme_reaction_map.items()
        if rid in min_reaction_list
    ]
min_reaction_list += enzymes_list + relaxation_list
print(
    f"Number of reactions minimize/maximize (minimum): {len(min_reaction_list)} / {len(pcmodel.reactions)}"
)

### Parse main results into smaller DataFrames
#### Seperate by reaction variable types

In [None]:
# Initialize entries with prefixes used for seperating DataFrames
dict_of_dataframes_types = {
    "reactions": None,
    "proteins": "PROTDL",
    # "complexes": "CPLXFM",
    # "complex_dilutions": "CPLXDL",
    "enzymes": "ENZDL",
    # "enzyme_formation": "ENZFM",
    "budgets": "PBDL",
    "relaxation": "RELAX",
}
for key, prefix in dict_of_dataframes_types.copy().items():
    if prefix:
        df = df_pcfva_all[
            df_pcfva_all["reactions"].apply(lambda x: x.startswith(prefix))
        ]
    else:
        df = df_pcfva_all[
            df_pcfva_all["reactions"].apply(lambda x: x in model.reactions)
        ]
    dict_of_dataframes_types[key] = df.copy()

dict_of_dataframes_types;

#### Seperate by optimum value

In [None]:
# dict_of_dataframes_opt = {
#     optimum: df_pcfva_all[df_pcfva_all["optimum"] == optimum].copy()
#     for optimum in df_pcfva_all["optimum"].unique()
# }
# print(list(dict_of_dataframes_opt))

#### Seperate by model

In [None]:
# dict_of_dataframes_model = {
#     model_id: df_pcfva_all[df_pcfva_all["model"] == model_id].copy()
#     for model_id in df_pcfva_all["model"].unique()
# }
# print(list(dict_of_dataframes_model))

### Create DataFrame for correlation calculations
#### Get maximum reaction fluxes and associated abundance values

In [None]:
groupby_list = ["model", "reactions"]
always_abundance_independent = [
    r.id for r in model.reactions.query(lambda x: not x.boundary and not x.genes)
]
print(
    f"Number of reactions w/o genes, always abundance independent: {len(always_abundance_independent)}"
)
always_abundance_independent;

##### Get maximum reaction flux

In [None]:
# Get the maximum value of the reaction flux in each direction, regardless of percent optimum
df = dict_of_dataframes_types["reactions"].copy()
df = df.groupby(groupby_list)[["minimum", "maximum"]].agg(
    {
        "minimum": "min",
        "maximum": "max",
    }
)
df_max_flux_per_model = df.abs().max(axis=1)
df_max_flux_per_model.name = "Flux"
df_max_flux_per_model

##### Get maximum flux range

In [None]:
# Determine flux range
df = dict_of_dataframes_types["reactions"].copy()
df["Range"] = df["maximum"] - df["minimum"]
df_flux_range_per_model = df.groupby(groupby_list)["Range"].max()
df_flux_range_per_model

##### Get maximum abundance

In [None]:
# Determine abundance association with reaction
df = dict_of_dataframes_types["enzymes"].copy()
df["reactions"] = df["reactions"].apply(lambda x: enzyme_reaction_map[x])
df_max_enzyme_per_model = df.groupby(groupby_list)["maximum"].max()
df_max_enzyme_per_model.name = "Abundance"
df_max_enzyme_per_model

##### Merge into one DataFrame

In [None]:
df_reaction_flux_abundance = (
    pd.merge(
        df_max_flux_per_model,
        df_flux_range_per_model,
        left_index=True,
        right_index=True,
    )
    .merge(df_max_enzyme_per_model, left_index=True, right_index=True)
    .reset_index(drop=False)
)
df_reaction_flux_abundance

### Define helper methods

In [None]:
def prepare_correlation_df(df, pvalue_tol):
    df["pvalue"] = df["pvalue"].apply(
        lambda x: -np.log10(x if x >= pvalue_tol else pvalue_tol)
    )
    df = df.sort_values(["pvalue", "rho"], ascending=[False, False])
    return df


def plot_correlations(
    df, ax=None, histx=True, histy=True, colorbar=True, vertical_lines=None, **kwargs
):
    # Define figure if no axes provided.
    scatter_inch = kwargs.get("scatter_inch", 5.0)
    hist_inch = kwargs.get("hist_inch", 1.0)
    hist_pad = kwargs.get("hist_pad", 0.25)
    if ax is None:
        _, ax = plt.subplots(
            nrows=1,
            ncols=1,
            figsize=(
                scatter_inch + (hist_inch + hist_pad if histy else 0),
                scatter_inch + (hist_inch + hist_pad if histx else 0),
            ),
        )
    # X-axis is rho, Y-axis is expected as -log10(pvalue) from correlation prep
    xy = {"x": "rho", "y": "pvalue"}
    limits = {
        "x": (kwargs.get("xmin", -1.0), kwargs.get("xmax", 1.0)),
        "y": (kwargs.get("ymin", 0.0), kwargs.get("ymax", df[xy["y"]].max())),
    }
    pads = {
        axis: kwargs.get(f"{axis}pad", (limits[axis][1] - limits[axis][0]) / 2 / 20)
        for axis in list(xy)
    }
    cmap = kwargs.get("cmap", "viridis")
    zorder = kwargs.get("zorder", 2)
    edgecolor = kwargs.get("edgecolor", "black")
    edgewidth = kwargs.get("edgewidth", 0.5)
    scatter = ax.scatter(
        xy["x"],
        xy["y"],
        data=df,
        c=kwargs.get("c", xy["y"]),
        s=kwargs.get("s", 40),
        zorder=zorder,
        edgecolor=edgecolor,
        linewidth=edgewidth,
        cmap=mpl.colormaps.get_cmap(cmap) if isinstance(cmap, str) else cmap,
        norm=mpl.colors.Normalize(
            vmin=limits["y"][0] - pads["y"], vmax=limits["y"][1] + pads["y"]
        ),
    )
    ax.set_xlabel(r"Spearman Correlation $(\rho)$", fontdict={"size": "xx-large"})
    ax.set_ylabel("-log$_{10}$(p-value)", fontdict={"size": "xx-large"})
    ax.set_xlim((limits["x"][0] - pads["x"], limits["x"][1] + pads["x"]))
    ax.set_ylim((limits["y"][0] - pads["y"], limits["y"][1] + pads["y"]))

    major_ticks = {axis: kwargs.get(f"{axis}tick_major") for axis in list(xy)}
    minor_ticks = {
        axis: kwargs.get(
            f"{axis}tick_minor",
            major_ticks[axis] / 2 if major_ticks[axis] is not None else None,
        )
        for axis in list(xy)
    }
    for axis in list(xy):
        if major_ticks[axis] is not None:
            getattr(ax, f"{axis}axis").set_major_locator(
                mpl.ticker.MultipleLocator(major_ticks[axis])
            )
        if minor_ticks[axis] is not None:
            getattr(ax, f"{axis}axis").set_minor_locator(
                mpl.ticker.MultipleLocator(minor_ticks[axis])
            )
        ax.tick_params(axis=axis, labelsize="large")

    if vertical_lines:
        for lineval, (lineprops, textprops) in vertical_lines.items():
            if lineprops:
                ax.vlines(
                    x=lineval,
                    ymin=limits["y"][0] - pads["y"],
                    ymax=limits["y"][1] + pads["y"],
                    **lineprops,
                )
            if textprops:
                ax.text(x=lineval + pads["x"] / 2, transform=ax.transData, **textprops)

    if kwargs.get("grid", False):
        ax.grid(True, **dict(which="both", alpha=0.75))

    if colorbar:
        cax = ax.inset_axes(
            [
                limits["x"][0] - pads["x"],  # lower left corner xpos
                limits["y"][0] - pads["y"],  # lower left corner ypos
                pads["x"],  # width of colorbar
                limits["y"][1]
                + pads["y"]
                + pads[
                    "y"
                ],  # height of colorbar, need extra ypad to make up for lowering ypos
            ],
            transform=ax.transData,
        )
        cbar = ax.get_figure().colorbar(scatter, cax=cax)
        cax.set_ylim((limits["y"][0] - pads["y"], limits["y"][1] + pads["y"]))
        cax.set_xticks([])
        cax.set_yticks([])

    ax_histx = None
    ax_histy = None
    if histx or histy:
        divider = make_axes_locatable(ax)
        # Histogram axes
        ax_histx = (
            divider.append_axes("top", hist_inch, pad=hist_pad, sharex=ax)
            if histx
            else None
        )
        ax_histy = (
            divider.append_axes("right", hist_inch, pad=hist_pad, sharey=ax)
            if histy
            else None
        )

        for axis, ax_hist in zip(list(xy), [ax_histx, ax_histy]):
            if ax_hist is None:
                continue
            binwidth = kwargs.get(
                f"{axis}binwidth",
                (
                    minor_ticks[axis]
                    if minor_ticks[axis] is not None
                    else major_ticks[axis]
                ),
            )
            counts, bins, patches = ax_hist.hist(
                df[xy[axis]],
                bins=np.arange(limits[axis][0], limits[axis][1] + binwidth, binwidth),
                orientation="vertical" if axis == "x" else "horizontal",
                zorder=zorder,
                edgecolor=edgecolor,
                linewidth=edgewidth,
            )
            other = "y" if axis == "x" else "x"
            ax_hist.tick_params(
                axis=axis, **{f"label{'bottom' if axis == 'x' else 'left'}": False}
            )
            ax_hist.tick_params(axis=other, labelsize="large")
            getattr(ax_hist, f"set_{other}label")("Frequency", fontsize="large")

            tick_major_int = kwargs.get(f"hist{axis}_{other}tick_major")
            if tick_major_int is not None:
                getattr(ax_hist, f"{other}axis").set_major_locator(
                    mpl.ticker.MultipleLocator(tick_major_int)
                )
                getattr(ax_hist, f"{other}axis").set_minor_locator(
                    mpl.ticker.MultipleLocator(tick_major_int / 2)
                )
            getattr(ax_hist, f"set_{other}lim")((0, max(counts) * 1.1))
            if kwargs.get("grid", False):
                ax_hist.grid(True, **dict(which="both", alpha=0.75))

            if vertical_lines and (axis == "x" and ax_hist is not None):
                for lineval, (lineprops, _) in vertical_lines.items():
                    if lineprops:
                        ax_hist.vlines(
                            x=lineval, ymin=0.0, ymax=max(counts) * 1.1, **lineprops
                        )

    return ax, ax_histx, ax_histy

### Create subgroups of models 

In [None]:
# Don't include mean/median in correlation calculations
model_groups = {
    "ALL": [
        x
        for x in list_of_pcmodels
        if not any([op.capitalize() in x for op in operations])
    ],
    "OPERATIONS": operation_model_ids,
}
# model_groups.update({
#     f"{time_prefix}{time}": [x for x in model_groups["ALL"] if x.endswith(f"{time_prefix}{time}")]
#     for time in timepoints
# })
list(model_groups)

### Compute correlations with metadata

In [None]:
df_metadata = pd.read_csv(
    f"{dataset_path}/{dataset_name}_Metadata.tsv", sep="\t", index_col=0
)
df_metadata = df_metadata.convert_dtypes()

numeric_metadata_columns = df_metadata.select_dtypes(include="number").columns
other_metadata_columns = df_metadata.select_dtypes(exclude="number").columns
# Map to models
df_pcmodel_meta = pd.DataFrame.from_dict(
    {
        pcmodel_id: pcmodel_id.replace(f"{pcmodel.id}_", "").split("_")[0]
        for pcmodel_id in list_of_pcmodels
    },
    orient="index",
    columns=["donor"],
)
sample_ids = set(df_pcmodel_meta["donor"].unique())
# Handle models generated using operations such as mean, median, and quantile
to_concat = []
operations = set(
    [
        get_sample_from_id(x, sample_prefix)
        for x in list_of_pcmodels
        if isinstance(get_sample_from_id(x, sample_prefix), str)
    ]
)
for op in operations:
    if op in sample_ids:
        df = getattr(
            df_metadata.loc[list(sample_ids.difference(operations))],
            op.lower(),
        )(axis=0, numeric_only=True)
        df.name = op
        to_concat.append(df)


df_metadata = pd.concat([df_metadata.T] + to_concat, axis=1).T
df_pcmodel_meta = df_pcmodel_meta.merge(df_metadata, left_on="donor", right_index=True)
df_pcmodel_meta

#### Identify all possible pairs of columns for correlation computations

In [None]:
for i, metadata_col in enumerate(numeric_metadata_columns, start=1):
    print(f"{i}:\t{metadata_col}")

#### Compute all pairs of correlations
Can take a significant amount of time the first time computed.

In [None]:
verbose = False
display_top_nhits = 10
reaction_list = list(df_reaction_flux_abundance["reactions"].unique())
corr_results_dirpath.mkdir(exist_ok=True, parents=True)

In [None]:
# Keep off to use previously computed results. Does not work if results not previously computed
run_computations = False
group_name = "ALL"
metadata_columns_for_corr = [
    # Comment/Uncomment to control desired columns
    "Age",
    "BMI",
    "Weight",
    "Height",
    "Hemolysis.volume",
    "Hemolysis.hct",
    "Hemolysis.storage_total_hb",
    "Hemolysis.storage_pct_hemol",
    "Hemolysis.pink_total_hb",
    "Hemolysis.pink_pct_hemol",
    "Recall.Transfer.Storage.Hemolysis",
    "Recall.Transfer.Osmotic.Hemolysis",
    "Recall.Transfer.Oxidative.Hemolysis",
    "Adjusted.Storage.Hemolysis",
    "Adjusted.Osmotic.Hemolysis",
    "Adjusted.Oxidative.Hemolysis",
    "CBC.WBC",
    "CBC.RBC",
    "CBC.HGB",
    "CBC.HCT",
    "CBC.MCV",
    "CBC.RDW",
    "CBC.PLT",
    "Ferritin",
]

if group_name in operations or group_name == "OPERATIONS":
    raise Exception("Cannot use models created using data from statistical operations.")

group_model_list = model_groups[group_name]
metadata_rankings_group_dict = defaultdict(dict)
for metadata_col in metadata_columns_for_corr:
    for column_pair in [("Flux", metadata_col), ("Abundance", metadata_col)]:
        filepath = Path(f"{corr_results_dirpath}/{column_pair[0]}_{column_pair[1]}.tsv")
        if filepath.exists():
            print(
                "Already computed correlations between '{}' and '{}' for '{}' models".format(
                    *column_pair, group_name
                )
            )
            df_correlations = pd.read_csv(f"{filepath}", sep="\t", index_col=0)
            df_correlations = df_correlations.loc[reaction_list]
        elif run_computations:
            print(
                "========================================================================="
            )
            print(
                "Computing correlations between '{}' and '{}' for '{}' models".format(
                    *column_pair, group_name
                )
            )
            print(
                "========================================================================="
            )
            correlations_dict = defaultdict(dict)
            metadata_cols = [
                col for col in column_pair if col in numeric_metadata_columns
            ]
            if any(metadata_cols):
                df_meta = df_pcmodel_meta.loc[:, metadata_cols].dropna()
                model_list = [m for m in group_model_list if m in df_meta.index]
            else:
                model_list = group_model_list.copy()
            for rid in reaction_list:
                df = df_reaction_flux_abundance[
                    df_reaction_flux_abundance["reactions"] == rid
                ]
                df = df.drop("reactions", axis=1)
                df = df.set_index("model")
                # Add metadata columns if needed
                if any(metadata_cols):
                    df = df.merge(
                        df_meta, left_index=True, right_index=True, how="inner"
                    ).dropna(axis=0)
                # Expected warnings emitted are due to constant input array, in which the correlation coefficient is not defined. Corresponds to the nan values.
                with warnings.catch_warnings(action="ignore"):
                    rho, pvalue = spearmanr(df.loc[model_list, list(column_pair)])
                correlations_dict[rid]["rho"] = rho
                correlations_dict[rid]["pvalue"] = pvalue
                if verbose:
                    print(f"For reaction {rid}: rho={rho}, p={pvalue}")
            df_correlations = pd.DataFrame.from_dict(correlations_dict, orient="index")
            df_correlations.to_csv(f"{filepath}", sep="\t", index=True)
        else:
            continue

        metadata_rankings_group_dict[group_name][column_pair] = df_correlations
        metadata_rankings_group_dict[group_name][column_pair]

In [None]:
df_pathways = pd.read_csv(
    f"{data_path}/subsystems.tsv", sep="\t", index_col=0, dtype=str
)
cat_cols = ["subsystem", "category"]
df_pathways = df_pathways.fillna("").reset_index()

# Categories that should be excluded from the figure
categories_to_exclude = {"Pseudoreactions", "Model total"}
# Main categories in figure and abbreviations, all unmapped categories are mapped to "Other"
categories_to_keep = {
    "Amino acid metabolism": mpl.cm.spring,
    "Carbohydrate metabolism": mpl.cm.Greens,
    "Lipid metabolism": mpl.cm.Blues,
    "Metabolism of cofactors and vitamins": mpl.cm.summer,
    "Nucleotide metabolism": mpl.cm.winter,
    "Reactive species": mpl.cm.Reds,
    "Transport reactions": mpl.cm.Purples,
    "Other": mpl.cm.gray_r,
}
use_abbrevs = True
abbrevs = {
    "Amino acid metabolism": "A",
    "Carbohydrate metabolism": "C",
    "Lipid metabolism": "L",
    "Metabolism of cofactors and vitamins": "V",
    "Nucleotide metabolism": "N",
    "Reactive species": "R",
    "Transport reactions": "T",
    "Other": "O",
}
barsize = 0.8
cmax = 0.8
cmin = 0.15


colormaps_normal = dict(
    zip(
        categories_to_keep,
        [
            cmap_map(lambda x: x * 1, categories_to_keep[k])(cmax)
            for k in categories_to_keep
        ],
    )
)
category_colors = {key: tuple(value(cmax)) for key, value in categories_to_keep.items()}

# Group "Metabolism of other amino acids" with amino acids rather than treat as "other"
df_pathways["category"] = df_pathways["category"].replace(
    "Metabolism of other amino acids", "Amino acid metabolism"
)
df_pathways["category"] = df_pathways["category"].apply(
    lambda category: (
        "Other"
        if (
            category not in categories_to_keep and category not in categories_to_exclude
        )
        else category
    )
)
df_pathways = df_pathways.rename({"name": "subsystem"}, axis=1)
mapping_dict = df_pathways[cat_cols].set_index("subsystem").squeeze().to_dict()

In [None]:
to_concat = {
    ("Flux", "Abundance"): pd.read_csv(
        f"{corr_results_dirpath}/Flux_Abundance.tsv", sep="\t", index_col=0
    )
}
to_concat.update(metadata_rankings_group_dict[group_name])
for key, df in to_concat.items():
    df = df.copy()
    df.columns = pd.MultiIndex.from_tuples(
        tuple([(" and ".join(key), col) for col in df.columns]),
        names=("Variables", "Spearman correlation"),
    )
    to_concat[key] = df

df_all_correlations = pd.concat(list(to_concat.values()), axis=1)
gene_map = {
    r: build_string(sorted([g.id for g in pcmodel.reactions.get_by_id(r).genes]))
    for r in df_all_correlations.index
}
df_all_correlations.index = pd.MultiIndex.from_tuples(
    [(r, gene_map[r]) for r in df_all_correlations.index], names=("reaction", "genes")
)
df_all_correlations = df_all_correlations.reset_index(drop=False)
df_all_correlations = df_all_correlations.sort_values(
    by=[
        ("Flux and Abundance", "pvalue"),
        ("Flux and Abundance", "rho"),
        ("genes", ""),
        ("reaction", ""),
    ],
    ascending=[True, False, True, True],
).set_index(["reaction", "genes"])

df_all_correlations["subsystem"] = [
    model.reactions.get_by_id(x[0]).subsystem for x in df_all_correlations.index
]
df_all_correlations["category"] = [
    mapping_dict[model.reactions.get_by_id(x[0]).subsystem]
    for x in df_all_correlations.index
]
df_all_correlations["category"] = df_all_correlations["category"].apply(
    lambda category: (
        "Other"
        if (
            category not in categories_to_keep and category not in categories_to_exclude
        )
        else category
    )
)
df_all_correlations.to_csv(
    f"{dataset_path}/{pcmodel.id}_{dataset_name}_METADATA_CORRELATIONS.tsv",
    sep="\t",
    index=True,
)
df_all_correlations

In [None]:
row_method, row_metric = ("single", "euclidean")
col_method, col_metric = ("single", "euclidean")
row_cluster = True
col_cluster = True
# method, metric = ("average", "cityblock")
# row_method, row_metric = (method, metric)
# col_method, col_metric = (method, metric)
optimal_ordering = True
imagetype = "svg"

In [None]:
dtype = "Flux"
df_dtype = df_all_correlations.dropna().copy()
if "Flux and Abundance" in df_dtype.columns:
    df_dtype = df_dtype.drop("Flux and Abundance", level=0, axis=1)
df_dtype = df_dtype.loc[
    :, [x for x in df_dtype.columns if x[0].startswith(dtype) or x[0] in cat_cols]
]
df_dtype.to_csv(
    f"{dataset_path}/{pcmodel.id}_{dataset_name}_METADATA_{dtype.upper()}_CORRELATIONS.tsv",
    sep="\t",
    index=True,
)
df_dtype = df_dtype.loc[
    :, [x for x in df_dtype.columns if x[1] == "rho" or x[0] in cat_cols]
].droplevel(1, axis=1)
df_dtype

df_data = df_dtype.droplevel(1, axis=0)
df_data = df_data.loc[
    sorted(
        df_data.index,
        key=lambda x: (mapping_dict[model.reactions.get_by_id(x).subsystem], x),
    )
].T
df_data = df_data.loc[~df_data.index.isin(cat_cols)].astype(float)

# Cant exceed absolute value of 1
rho_lim = min(df_data.abs().max().max() * 1.1, 1)
fig = sns.clustermap(
    df_data,
    figsize=(20, 0.5 * len(df_data.index)),
    cmap="coolwarm",
    row_cluster=row_cluster,
    col_cluster=col_cluster,
    vmin=-rho_lim,
    vmax=rho_lim,
    row_linkage=linkage(
        df_data, method=row_method, metric=row_metric, optimal_ordering=optimal_ordering
    ),
    col_linkage=linkage(
        df_data.T,
        method=col_method,
        metric=col_metric,
        optimal_ordering=optimal_ordering,
    ),
    # col_colors=[category_colors[mapping_dict[model.reactions.get_by_id(x).subsystem]] for x in df_data.columns],
)
ax_heatmap = fig.ax_heatmap
ax_heatmap.set_xlabel("Reactions", fontsize="x-large")
ax_heatmap.xaxis.set_ticklabels([])
fig.ax_row_dendrogram.set_visible(False)
fig.ax_col_dendrogram.set_visible(False)
if save_figures:
    fig.savefig(
        f"{figures_path}/Fig6_PanelA_{dtype}MetaDataCorr_{model.id}.{imagetype}",
        transparent=transparent,
        format=imagetype,
    )
fig;

In [None]:
dtype = "Abun"
df_dtype = df_all_correlations.dropna().copy()
if "Flux and Abundance" in df_dtype.columns:
    df_dtype = df_dtype.drop("Flux and Abundance", level=0, axis=1)
df_dtype = df_dtype.loc[
    :, [x for x in df_dtype.columns if x[0].startswith(dtype) or x[0] in cat_cols]
]
df_dtype.to_csv(
    f"{dataset_path}/{pcmodel.id}_{dataset_name}_METADATA_{dtype.upper()}_CORRELATIONS.tsv",
    sep="\t",
    index=True,
)
df_dtype = df_dtype.loc[
    :, [x for x in df_dtype.columns if x[1] == "rho" or x[0] in cat_cols]
].droplevel(1, axis=1)
df_dtype

df_data = df_dtype.droplevel(1, axis=0)
df_data = df_data.loc[
    sorted(
        df_data.index,
        key=lambda x: (mapping_dict[model.reactions.get_by_id(x).subsystem], x),
    )
].T
df_data = df_data.loc[~df_data.index.isin(cat_cols)].astype(float)

# Cant exceed absolute value of 1
rho_lim = min(df_data.abs().max().max() * 1.1, 1)
fig = sns.clustermap(
    df_data,
    figsize=(20, 0.5 * len(df_data.index)),
    cmap="coolwarm",
    row_cluster=row_cluster,
    col_cluster=col_cluster,
    vmin=-rho_lim,
    vmax=rho_lim,
    row_linkage=linkage(
        df_data, method=row_method, metric=row_metric, optimal_ordering=optimal_ordering
    ),
    col_linkage=linkage(
        df_data.T,
        method=col_method,
        metric=col_metric,
        optimal_ordering=optimal_ordering,
    ),
    # col_colors=[category_colors[mapping_dict[model.reactions.get_by_id(x).subsystem]] for x in df_data.columns],
)
ax_heatmap = fig.ax_heatmap
ax_heatmap.set_xlabel("Reactions", fontsize="x-large")
ax_heatmap.xaxis.set_ticklabels([])
ax_heatmap.xaxis.set_tick_params(labelsize="x-large")
fig.ax_row_dendrogram.set_visible(False)
fig.ax_col_dendrogram.set_visible(False)

if save_figures:
    fig.savefig(
        f"{figures_path}/Fig6_PanelB_{dtype}MetaDataCorr_{model.id}.{imagetype}",
        transparent=transparent,
        format=imagetype,
    )
fig;

In [None]:
row_method, row_metric = ("average", "euclidean")
col_method, col_metric = ("average", "euclidean")

df_dtype = df_all_correlations.dropna().copy()
if "Flux and Abundance" in df_dtype.columns:
    df_dtype = df_dtype.drop("Flux and Abundance", level=0, axis=1)
df_dtype = df_dtype.loc[
    :, [x for x in df_dtype.columns if x[1] == "rho" or x[0] in cat_cols]
].droplevel(1, axis=1)
df_data = df_dtype.droplevel(1, axis=0)
df_data = df_data.loc[
    sorted(
        df_data.index,
        key=lambda x: (mapping_dict[model.reactions.get_by_id(x).subsystem], x),
    )
].T
df_data = df_data.loc[~df_data.index.isin(cat_cols)].astype(float)

# Cant exceed absolute value of 1
optimal_ordering = True
rho_lim = min(df_data.abs().max().max() * 1.1, 1)
fig = sns.clustermap(
    df_data,
    figsize=(20, 0.5 * len(df_data.index)),
    cmap="coolwarm",
    row_cluster=row_cluster,
    col_cluster=col_cluster,
    vmin=-rho_lim,
    vmax=rho_lim,
    row_linkage=linkage(
        df_data, method=row_method, metric=row_metric, optimal_ordering=optimal_ordering
    ),
    col_linkage=linkage(
        df_data.T,
        method=col_method,
        metric=col_metric,
        optimal_ordering=optimal_ordering,
    ),
    xticklabels=False,
    # col_colors=[category_colors[mapping_dict[model.reactions.get_by_id(x).subsystem]] for x in df_data.columns],
)
fig.ax_row_dendrogram.set_visible(False)
fig.ax_col_dendrogram.set_visible(False)

#### Visualize correlations with metadata

In [None]:
# Consistent plot dimensions and values
pvalue_tol = 1e-320
scatter_inch = 5  # Length x width of scatter plot
hist_inch = 1  # Length or width of histogram addition
hist_pad = 0.4  # Space between scatter and histogram
cmap = "viridis"
edgecolor = "black"
edgewidth = 0.5
grid = False
zorder = 2
histx = True
histy = True

##### Age

In [None]:
metadata_col = "Age"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)

print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### BMI

In [None]:
metadata_col = "BMI"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = round(
        max(df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max())
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Weight

In [None]:
metadata_col = "Weight"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=30,
    ytick_minor=10,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = round(
        max(df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max())
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Height

In [None]:
metadata_col = "Height"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = round(
        max(df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max())
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Hemolysis.volume

In [None]:
metadata_col = "Hemolysis.volume"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=2,
    ytick_minor=0.5,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = round(
        max(df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max())
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Hemolysis.hct

In [None]:
metadata_col = "Hemolysis.hct"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=30,
    ytick_minor=10,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = round(
        max(df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max())
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)

axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Hemolysis.storage_total_hb

In [None]:
metadata_col = "Hemolysis.storage_total_hb"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=30,
    ytick_minor=10,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = round(
        max(df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max())
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Hemolysis.storage_pct_hemol

In [None]:
metadata_col = "Hemolysis.storage_pct_hemol"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = round(
        max(df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max())
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Hemolysis.pink_total_hb

In [None]:
metadata_col = "Hemolysis.pink_total_hb"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=2,
    ytick_minor=0.5,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = round(
        max(df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max())
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Hemolysis.pink_pct_hemol

In [None]:
metadata_col = "Hemolysis.pink_pct_hemol"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=2,
    ytick_minor=0.5,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Recall.Transfer.Storage.Hemolysis

In [None]:
metadata_col = "Recall.Transfer.Storage.Hemolysis"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=2,
    ytick_minor=0.5,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Recall.Transfer.Osmotic.Hemolysis

In [None]:
metadata_col = "Recall.Transfer.Osmotic.Hemolysis"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=2,
    ytick_minor=0.5,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Recall.Transfer.Oxidative.Hemolysis

In [None]:
metadata_col = "Recall.Transfer.Oxidative.Hemolysis"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Adjusted.Storage.Hemolysis

In [None]:
metadata_col = "Adjusted.Storage.Hemolysis"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Adjusted.Osmotic.Hemolysis

In [None]:
metadata_col = "Adjusted.Osmotic.Hemolysis"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=2,
    ytick_minor=0.5,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Adjusted.Oxidative.Hemolysis

In [None]:
metadata_col = "Adjusted.Oxidative.Hemolysis"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=2,
    ytick_minor=0.5,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### CBC.WBC

In [None]:
metadata_col = "CBC.WBC"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=2,
    ytick_minor=0.5,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### CBC.RBC

In [None]:
metadata_col = "CBC.RBC"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### CBC.HGB

In [None]:
metadata_col = "CBC.HGB"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=800,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=1,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=800,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### CBC.HCT

In [None]:
metadata_col = "CBC.HCT"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=2,
    ytick_minor=0.5,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)

if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### CBC.MCV

In [None]:
metadata_col = "CBC.MCV"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### CBC.RDW

In [None]:
metadata_col = "CBC.RDW"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### CBC.PLT

In [None]:
metadata_col = "CBC.PLT"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

##### Ferritin

In [None]:
metadata_col = "Ferritin"
group_name = "ALL"  # Use models in visualizations
xmin, xmax = (-0.60, 0.60)
xpad = 0.05
plot_kwargs = dict(
    xmin=xmin,
    xmax=xmax,
    xpad=xpad,
    edgecolor=edgecolor,
    edgewidth=edgewidth,
    cmap=cmap,
    zorder=zorder,
    scatter_inch=scatter_inch,
    hist_inch=hist_inch,
    hist_pad=hist_pad,
    grid=grid,
    xtick_major=0.2,
    xtick_minor=0.1,  # Determines minor tick and thus bin size if not otherwise set
    # xbinwidth=0.1, # Determined by minor ticks if not otherwise set
    histx_ytick_major=600,  # Major y-tick interval for histogram aligned with x-axis
    ytick_major=10,
    ytick_minor=2,  # Determines minor tick and thus bin size if not otherwise set
    # ybinwidth=10,  # Determined by minor ticks if not otherwise set
    histy_xtick_major=600,  # Major x-tick interval for histogram aligned with y-axis
)
sharey = True


nrows, ncols = (1, 2)
fig, (ax_flux, ax_abun) = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(
        (scatter_inch + (hist_inch + hist_pad if histx else 0)) * ncols,
        (scatter_inch + (hist_inch + hist_pad if histy else 0)) * nrows,
    ),
    sharex=True,
    sharey=sharey,
)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Flux", metadata_col))].droplevel(1).copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_flux = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


df_corr = (
    df_all_correlations.loc[:, " and ".join(("Abundance", metadata_col))]
    .droplevel(1)
    .copy()
)
# Remove correlations that could not be calculated due to fixed/blocked flux
df_corr = df_corr[~df_corr.isna().any(axis=1)]
df_corr_abun = prepare_correlation_df(df_corr, pvalue_tol=pvalue_tol)


if sharey:
    plot_kwargs["ymax"] = max(
        df_corr_flux["pvalue"].max(), df_corr_abun["pvalue"].max()
    )

axes_flux = plot_correlations(
    df_corr_flux,
    ax=ax_flux,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
axes_abun = plot_correlations(
    df_corr_abun,
    ax=ax_abun,
    histx=histx,
    histy=histy,
    colorbar=True,
    vertical_lines={},
    **plot_kwargs,
)
print(
    df_all_correlations.loc[
        df_corr_flux.head(display_top_nhits).index,
        [c for c in df_all_correlations.columns if c[0] == f"Flux and {metadata_col}"],
    ]
)
print()
print(
    df_all_correlations.loc[
        df_corr_abun.head(display_top_nhits).index,
        [
            c
            for c in df_all_correlations.columns
            if c[0] == f"Abundance and {metadata_col}"
        ],
    ]
)
axes_flux[0].set_title(
    f"Flux and {metadata_col}",
    fontsize="x-large",
    loc="center",
)
axes_abun[0].set_title(
    f"Abundance and {metadata_col}", fontsize="x-large", loc="center"
)

axes_flux[1].sharey(axes_abun[1])
axes_flux[2].sharex(axes_abun[2])
fig;

In [None]:
df_rho = df_all_correlations.drop("Flux and Abundance", level=0, axis=1)
df_rho = (
    df_rho.loc[:, [x for x in df_rho.columns if x[1] == "rho"]]
    .droplevel(1, axis=1)
    .droplevel(1, axis=0)
    .dropna()
)
df_rho

In [None]:
asdasd

In [None]:
df = df_rho.loc[:, [x for x in df_rho.columns if x.startswith("Flux")]].T
df = df.rename({x: x.split(" and ")[-1] for x in df.index}, axis=0)


rho_max = 0.52

fig = sns.clustermap(
    data=df,
    cmap="coolwarm",
    vmin=-rho_max,
    vmax=rho_max,
    # z_score=None,
    # standard_scale=None,
    figsize=(20, 15),
    # cbar_kws=None,
    row_cluster=True,
    col_cluster=True,
    # row_linkage=row_linkage,
    # col_linkage=col_linkage,
    # row_colors=None,
    # col_colors=None,
    # mask=None,
    # dendrogram_ratio=0.2,
    # colors_ratio=0.03,
    # cbar_pos=(0.02, 0.8, 0.05, 0.18),
    # tree_kws=None,
    # **kwargs,
)
ax_heatmap = fig.ax_heatmap
ax_heatmap.xaxis.set_tick_params(labelsize="x-large")
ax_heatmap.xaxis.set_ticklabels([])

ax_heatmap.yaxis.set_tick_params(labelsize="x-large")
# ax_heatmap.yaxis.set_ticklabels([])
fig;

In [None]:
# df = df_rho.loc[:, [x for x in df_rho.columns if x.startswith("Flux")]].T
# # df = df_rho.T
# rho_max = 0.52
# row_linkage = linkage(df, method='average', metric='jaccard', optimal_ordering=True)
# col_linkage = linkage(df, method='average', metric='euclidean', optimal_ordering=True)
# fig = sns.clustermap(
#     data=df,
#     cmap="coolwarm",
#     vmin=-rho_max,
#     vmax= rho_max,
#     # z_score=None,
#     # standard_scale=None,
#     figsize=(20, 15),
#     # cbar_kws=None,
#     row_cluster=False,
#     col_cluster=False,
#     row_linkage=row_linkage,
#     col_linkage=col_linkage,
#     # row_colors=None,
#     # col_colors=None,
#     # mask=None,
#     # dendrogram_ratio=0.2,
#     # colors_ratio=0.03,
#     # cbar_pos=(0.02, 0.8, 0.05, 0.18),
#     # tree_kws=None,
#     # **kwargs,
# )
# ax_heatmap = fig.ax_heatmap
# ax_heatmap.xaxis.set_tick_params(labelsize="x-large")
# ax_heatmap.xaxis.set_ticklabels([])

# ax_heatmap.yaxis.set_tick_params(labelsize="xx-large")
# # ax_heatmap.yaxis.set_ticklabels([])

In [None]:
# df = df_rho.loc[:, [x for x in df_rho.columns if x.startswith("Abundance")]].T
# df = df_rho.T
# rho_max = 0.52
# row_linkage = linkage(df, method='single', metric='jaccard', optimal_ordering=True)
# col_linkage = linkage(df, method='single', metric='ward', optimal_ordering=True)
# fig = sns.clustermap(
#     data=df,
#     cmap="coolwarm",
#     vmin=-rho_max,
#     vmax= rho_max,
#     # z_score=None,
#     # standard_scale=None,
#     figsize=(20, 15),
#     # cbar_kws=None,
#     row_cluster=False,
#     col_cluster=False,
#     row_linkage=row_linkage,
#     col_linkage=col_linkage,
#     # row_colors=None,
#     # col_colors=None,
#     # mask=None,
#     # dendrogram_ratio=0.2,
#     # colors_ratio=0.03,
#     # cbar_pos=(0.02, 0.8, 0.05, 0.18),
#     # tree_kws=None,
#     # **kwargs,
# )
# ax_heatmap = fig.ax_heatmap
# ax_heatmap.xaxis.set_tick_params(labelsize="x-large")
# ax_heatmap.xaxis.set_ticklabels([])

# ax_heatmap.yaxis.set_tick_params(labelsize="xx-large")
# # ax_heatmap.yaxis.set_ticklabels([])

In [None]:
# df = df_rho.loc[:, [x for x in df_rho.columns if x.startswith("Flux")]].T

# fig_cluster = sns.clustermap(
#     data=df,
#     # *,
#     # pivot_kws=None,
#     "average"method=,
#     metric='euclidean',
#     # z_score=None,
#     # standard_scale=None,
#     # cbar_kws=None,
# 20, 20),    figsize=(
#     row_cluster=True,
#     col_cluster=True,
#     # row_colors=None,
#     # col_colors=None,
#     # mask=None,
#     # dendrogram_ratio=0.2,
#     # colors_ratio=0.03,
#     # cbar_pos=(0.02, 0.8, 0.05, 0.18),
#     # tree_kws=None,
#     # **kwargs,
# )
# ax = fig_cluster.ax_heatmap
# # ax.xaxis.set_ticklabels([])
# "x-large")ax.yaxis.set_tick_params(labelsize=

# fig_cluster;