# Compute statistically significant fluxes between groups - Mouse G6PD variants omics data
## Setup
### Import packages

In [None]:
import re
import textwrap
import warnings
from collections import defaultdict
from itertools import combinations

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from rbc_gem_utils import (
    COBRA_CONFIGURATION,
    ensure_iterable,
    get_dirpath,
    read_cobra_model,
    show_versions,
)
from rbc_gem_utils.analysis.overlay import (
    DEFAULT_PREFIX_SUFFIX_VALUES,
    DEFAULT_PROTEOME_COMPARTMENT,
    EnzymeDilution,
    add_relaxation_budget,
    load_overlay_model,
    plot_correlations,
)
from rbc_gem_utils.visualization import cmap_map
from scipy.stats import kruskal, mannwhitneyu, spearmanr

plt.rcParams["font.family"] = "Arial"

show_versions()

### Define configuration
#### COBRA Configuration

In [None]:
COBRA_CONFIGURATION.solver = "gurobi"
# Set bound defaults much larger to prevent model loading issues
COBRA_CONFIGURATION.bounds = (-1e-8, 1e8)
COBRA_CONFIGURATION.tolerance = 1e-7
COBRA_CONFIGURATION

### Define organism, model, and dataset

In [None]:
organism = "Mouse"
model_id = "RBC_GEM"
dataset_name = "G6PDvariants"

### Set variables for sample identification

In [None]:
# For sample IDs
timepoints = ["Pre", "Post", "TD"]
phenotypes = ["HumCan", "A", "MED"]
donor_re = re.compile(rf"(?P<donor>({'|'.join(phenotypes)})(?P<num>\d+))")
time_re = re.compile(rf"(?P<time>{'|'.join(timepoints)})")
phenotype_re = re.compile(rf"(?P<phenotype>({'|'.join(phenotypes)}))")

operations = "|".join([x.capitalize() for x in ["mean", "median"]])

operation_re = re.compile(r"(?P<op>" + operations + r")\_(?P<group>\w+)")
sample_id_re = re.compile(
    r"(?!" + operations + r")" + donor_re.pattern + r"\_" + time_re.pattern
)

### Set computation options

In [None]:
run_computations = True
verbose = True
objective_reactions = ["NaKt"]

enzyme_rxn_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["prefix.dilution"]
enzyme_met_prefix = DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["prefix.metabolite"]
enzyme_met_suffix_total = DEFAULT_PREFIX_SUFFIX_VALUES["enzymes"]["suffix.total"]
comp_suffix = f"_{DEFAULT_PROTEOME_COMPARTMENT}"

### Set figure options

In [None]:
save_figures = True
transparent = False
imagetype = "svg"

### Set paths

In [None]:
# Set paths
processed_data_dirpath = get_dirpath(use_temp="processed") / organism / dataset_name

overlay_dirpath = get_dirpath("analysis") / "OVERLAY" / organism
model_dirpath = overlay_dirpath / model_id
results_dirpath = (
    get_dirpath(use_temp="processed") / model_id / "OVERLAY" / organism / dataset_name
)

sample_pcmodels_dirpath = results_dirpath / "sample_pcmodels"
pcfva_results_dirpath = (
    results_dirpath / "pcFVA" / "_".join(("OBJ", *objective_reactions))
)
# Objective reaction does not matter since correlations are computed
# based on min and max fluxes and abundance, which are obtained when optimum is 0.
corr_results_dirpath = results_dirpath / "correlations"
# Ensure directory  exists
corr_results_dirpath.mkdir(exist_ok=True, parents=True)

## Load RBC-GEM model

In [None]:
model = read_cobra_model(filename=model_dirpath / f"{model_id}.xml")
pcmodel = load_overlay_model(filename=model_dirpath / f"{model_id}_PC.xml")

# Add relaxation budget to initial PC model to get names of relaxation reactions
add_relaxation_budget(pcmodel, 0, verbose=False)
pcmodel

## Load pcFVA generated results

In [None]:
# Load DataFrame of generated results
df_pcfva_all = pd.read_csv(
    pcfva_results_dirpath / f"{pcmodel.id}_All_FVAsols.csv",
    index_col=None,
)

df_pcfva_all

## Create DataFrame for calculations and visualizations
### Get maximum reaction fluxes and associated abundance values
#### Get maximum reaction fluxes and ranges

In [None]:
rxns = model.reactions.list_attr("id")
df_max_flux_per_model = df_pcfva_all[df_pcfva_all["reactions"].isin(rxns)].copy()
df_max_flux_per_model = df_max_flux_per_model.groupby(
    ["model", "reactions", "optimum"]
)[["min", "max"]].agg(
    {
        "min": "min",  # Minimum reaction flux per model
        "max": "max",  # Maximum reaction flux per model
    }
)
# Address issues possibly caused by floating point precision, ideally a value that prevents any negative ranges
df_max_flux_per_model.loc[
    df_max_flux_per_model["max"] < df_max_flux_per_model["min"], ["max", "min"]
] = [0, 0]
atol = COBRA_CONFIGURATION.tolerance
df_max_flux_per_model["max"] = df_max_flux_per_model["max"].apply(
    lambda x: 0 if np.isclose(x, 0, atol=atol) else round(x, -int(np.log10(atol)))
)
df_max_flux_per_model["min"] = df_max_flux_per_model["min"].apply(
    lambda x: 0 if np.isclose(x, 0, atol=atol) else round(x, -int(np.log10(atol)))
)
df_max_flux_per_model["range"] = (
    df_max_flux_per_model["max"] - df_max_flux_per_model["min"]
)
# Ensure no negative values, if results appear then tolerance should be adjusted
df_max_flux_per_model[df_max_flux_per_model["range"] < 0]

#### Get maximum "enzyme" abundances

In [None]:
rxns = pcmodel.reactions.query(
    lambda x: isinstance(x, EnzymeDilution)
    and x.id.endswith(f"{enzyme_met_suffix_total}{comp_suffix}")
).list_attr("id")
df_max_abundance_per_model = df_pcfva_all[df_pcfva_all["reactions"].isin(rxns)].copy()
# Rename dilution reactions to match
reaction_enzyme_map = {
    enzyme_rid: enzyme_rid.replace(
        f"{enzyme_rxn_prefix}{enzyme_met_prefix}", ""
    ).replace(f"{enzyme_met_suffix_total}{comp_suffix}", "")
    for enzyme_rid in df_max_abundance_per_model["reactions"]
}
df_max_abundance_per_model["reactions"] = df_max_abundance_per_model[
    "reactions"
].replace(reaction_enzyme_map)
df_max_abundance_per_model = df_max_abundance_per_model.groupby(
    ["model", "reactions", "optimum"]
)[["max"]].max()
# Address issues possibly caused by floating point precision, atol is ideally a value that prevents any negative ranges
df_max_abundance_per_model["max"] = df_max_abundance_per_model["max"].apply(
    lambda x: 0 if x < 0 else x
)
atol = COBRA_CONFIGURATION.tolerance
df_max_abundance_per_model["max"] = df_max_abundance_per_model["max"].apply(
    lambda x: 0 if np.isclose(x, 0, atol=atol) else round(x, -int(np.log10(atol)))
)
df_max_abundance_per_model = df_max_abundance_per_model.rename(
    {"max": "abundance"}, axis=1
)
# Ensure no negative values, if results appear then tolerance should be adjusted
df_max_abundance_per_model[(df_max_abundance_per_model < 0).any(axis=1)]

#### Merge DataFrames

In [None]:
df_data_all = pd.merge(
    df_max_flux_per_model,
    df_max_abundance_per_model,
    left_index=True,
    right_index=True,
    how="left",
)
df_data_all = df_data_all.reset_index(drop=False)
df_data_all

### Identify donor, timepoints, and phenotypes for results

In [None]:
metadata_columns = ["donor", "time", "phenotype"]
for key, search_re in zip(metadata_columns, [donor_re, time_re, phenotype_re]):
    df_data_all[key] = df_data_all["model"].apply(
        lambda x: search_re.search(x).group(1) if search_re.search(x) else pd.NA
    )
df_data_all

## Compute statistically significant results between groups
### Remove models based on data operations

In [None]:
df_data_samples = df_data_all[
    [not bool(operation_re.search(x)) for x in df_data_all["model"]]
].copy()
df_data_samples

### Create groups of sample models

In [None]:
all_key = "ALL"
model_groups = {all_key: list(df_data_samples["model"].unique())}


def create_group_of_models(df, groupby, verbose=False):
    grouped = df.groupby(groupby)["model"].agg(lambda x: list(x.unique()))
    grouped = {"_".join(ensure_iterable(k)): v for k, v in grouped.to_dict().items()}
    max_name_len = max([len(group_name) for group_name in list(grouped)])
    if verbose:
        for group_name, model_list in grouped.items():
            spacepad = "".join([" "] * (max_name_len - len(group_name)))
            print(f"{group_name}:{spacepad}\t{len(model_list)} samples")
    return grouped

#### Based on timepoint

In [None]:
grouped = create_group_of_models(
    df_data_samples[["model"] + metadata_columns], groupby=["time"], verbose=verbose
)
model_groups.update(grouped)

##### Based on timepoint and phenotype

In [None]:
grouped = create_group_of_models(
    df_data_samples[["model"] + metadata_columns],
    groupby=["time", "phenotype"],
    verbose=verbose,
)
model_groups.update(grouped)

#### Based on phenotype

In [None]:
grouped = create_group_of_models(
    df_data_samples[["model"] + metadata_columns],
    groupby=["phenotype"],
    verbose=verbose,
)
model_groups.update(grouped)

##### Based on phenotype and timepoint

In [None]:
grouped = create_group_of_models(
    df_data_samples[["model"] + metadata_columns],
    groupby=["phenotype", "time"],
    verbose=verbose,
)
model_groups.update(grouped)

### View model groups

In [None]:
print("Possible groups for analyses\n============================")
max_name_len = max([len(group_name) for group_name in list(model_groups)])
for group_name, model_list in model_groups.items():
    spacepad = "".join([" "] * (max_name_len - len(group_name)))
    print(f"{group_name}:{spacepad}\t{len(model_list)} samples")

df_data_for_analyses = df_data_samples.set_index(["reactions", "model"]).drop(
    metadata_columns, axis=1
)
df_data_for_analyses

#### Ensure groups exist and setup directory structure

In [None]:
# New directories are created for main groups
groups_dict = defaultdict(dict)
group_items_list = [
    ["Pre", "Post", "TD"],
    ["HumCan", "A", "MED"],
]
# All main groups are created under the "All" directory, subgroups are created under each main group
for item_list in group_items_list:
    groups_dict[all_key].update({item: {} for item in item_list})
    groups_dict[all_key].update(
        {
            item: sorted(
                [
                    group_name
                    for group_name in model_groups
                    if group_name.split("_")[0] == item and group_name != item
                ]
            )
            for item in item_list
        }
    )
groups_dict[all_key].update(
    {k: groups_dict[all_key][k] for k in sorted(sorted(groups_dict[all_key]))}
)
invalid_groups = [
    group_name for group_name in groups_dict[all_key] if group_name not in model_groups
]
if any(invalid_groups):
    raise KeyError(
        f"No group(s) found for `{invalid_groups}`. Model groups must be created first before correlation computations"
    )
invalid_subgroups = [
    subgroup
    for group_values in groups_dict[all_key].values()
    for subgroup in group_values
    if subgroup not in model_groups
]
if any(invalid_subgroups):
    raise KeyError(
        f"No subgroup(s) found for `{invalid_subgroups}`. Model groups must be created first before correlation computations"
    )


header = "Expected directory structure"
print("\n".join((header, "=" * len(header), all_key)))
for idx, (group_name, subgroups) in enumerate(sorted(groups_dict[all_key].items())):
    print("\u2514\u2500\u2500" + f" {group_name}")
    vertical = "\u2502" if idx != len(groups_dict[all_key]) - 1 else " "
    for subgroup_name in sorted(subgroups):
        print(vertical + "   \u2514\u2500\u2500" + subgroup_name)

group_results_dirpath_dict = {all_key: corr_results_dirpath}
for group_name, subgroups in groups_dict[all_key].items():
    group_results_dirpath_dict[group_name] = (
        group_results_dirpath_dict[all_key] / group_name
    )
    group_results_dirpath_dict.update(
        {
            subgroup_name: group_results_dirpath_dict[group_name] / subgroup_name
            for subgroup_name in subgroups
        }
    )

#### Load subsystems and metabolic categories to enrich results

In [None]:
subsystems_to_exclude = {"Pseudoreactions"}
use_abbrevs = True
abbreviations = {
    "Amino acid metabolism": "A",
    "Carbohydrate metabolism": "C",
    "Lipid metabolism": "L",
    "Metabolism of cofactors and vitamins": "V",
    "Nucleotide metabolism": "N",
    "Reactive species": "R",
    "Transport reactions": "T",
    "Other": "O",
}
categories_to_keep = list(abbreviations)

df_pathways = pd.read_csv(
    get_dirpath("curation") / "subsystems.tsv", sep="\t", dtype=str
).fillna("")

# Rename "name" to subsystem to match reaction attribute
df_pathways = df_pathways.rename({"name": "subsystem"}, axis=1)
# Group "Metabolism of other amino acids" with amino acids rather than treat as "other"
df_pathways["category"] = df_pathways["category"].replace(
    "Metabolism of other amino acids", "Amino acid metabolism"
)

df_pathways["category"] = df_pathways["category"].apply(
    lambda x: ("Other" if x not in categories_to_keep else x)
)
df_pathways = df_pathways[~df_pathways["subsystem"].isin(subsystems_to_exclude)].copy()
subsystem_to_category_dict = df_pathways.set_index("subsystem")["category"].to_dict()
df_pathways

## Compute significant results between groups
#### Compare all subgroups at once

In [None]:
group_name = "Pre"
optimum = 0
value_to_compare = "range"
compare_pairwise = True
compare_all_groups = True
ordered_group_to_compare = [f"{group_name}_{phenotype}" for phenotype in phenotypes]

all_samples_for_comparison = [
    value for g in ordered_group_to_compare for value in np.array(model_groups[g])
]
df_data_for_correlations = df_data_for_analyses.loc[
    pd.IndexSlice[:, all_samples_for_comparison], :
]
df_data_for_correlations = df_data_for_correlations[
    df_data_for_correlations["optimum"] == optimum
].drop("optimum", axis=1)

if value_to_compare == "max":
    df_data_for_correlations["max"] = (
        df_data_for_correlations[["min", "max"]].abs().max(axis=1)
    )
    df_data_for_correlations = df_data_for_correlations.drop("min", axis=1)

print("Groups to compare\n=================")
if compare_all_groups:
    print(tuple(ordered_group_to_compare))
if compare_pairwise:
    pairwise_group_combos = list(combinations(ordered_group_to_compare, 2))
    for group in pairwise_group_combos:
        print(group)
df_data_for_correlations

In [None]:
results_dict = defaultdict(dict)
mannwhitneyu_for_pairs = True
for rid in df_data_for_correlations.index.get_level_values("reactions").unique():
    df_data_rxn = df_data_for_correlations.loc[rid]
    df_data_rxn_opt_value = df_data_rxn[value_to_compare].copy()
    data_arrays = {
        group_name: df_data_rxn_opt_value.loc[model_groups[group_name]].values
        for group_name in ordered_group_to_compare
    }
    if compare_all_groups:
        values = list(data_arrays.values())
        unique_values = set(
            [v for value_list in values for v in value_list if not np.isnan(v)]
        )
        if len(unique_values) <= 1:
            # Skip variables that do not have any differences
            results_dict[tuple(ordered_group_to_compare)][rid] = dict(
                zip(["statistic", "pvalue"], [pd.NA, pd.NA])
            )
        else:
            result = kruskal(*values, nan_policy="omit")
            results_dict[tuple(ordered_group_to_compare)][rid] = {
                attr: getattr(result, attr) for attr in ["statistic", "pvalue"]
            }
    if compare_pairwise:
        for combo in pairwise_group_combos:
            values = [data_arrays[group] for group in combo]
            unique_values = set(
                [v for value_list in values for v in value_list if not np.isnan(v)]
            )
            if len(unique_values) <= 1:
                # Skip variables that do not have any differences
                result = dict(zip(["statistic", "pvalue"], [pd.NA, pd.NA]))
            elif mannwhitneyu_for_pairs:
                result = mannwhitneyu(*values, nan_policy="omit")
                result = {
                    attr: getattr(result, attr) for attr in ["statistic", "pvalue"]
                }
            else:
                result = kruskal(*values, nan_policy="omit")
                result = {
                    attr: getattr(result, attr) for attr in ["statistic", "pvalue"]
                }
            results_dict[combo][rid] = result
dataframes = {
    key: pd.DataFrame.from_dict(values, orient="index")
    for key, values in results_dict.items()
}
print(f"Number of different comparisons made: {len(dataframes)}")
print("Groups compared\n===============")
for key in list(dataframes):
    print(key)

### Determine significance using p-values

In [None]:
pvalue_sig = 0.05
enzyme_reactions_only = False
include_boundary_reactions = False
remove_group_name_from_samples = True
sort_by_subsystem = True
standardize_by = "mean"
use_group_means = False

significant_dataframes = {}
for met in model.metabolites.query(lambda x: x.compartment == "e"):
    met.name += " (extracellular)"
metadata_columns = [
    "name",
    "stoichiometry",
    "proteins",
    "pvalue",
    "subsystem",
    "category",
]
for key, df in dataframes.items():
    df = df.dropna()
    df = df[df["pvalue"] <= pvalue_sig].drop("statistic", axis=1)
    if enzyme_reactions_only:
        df_pivot = df_data_for_correlations.loc[
            df.index, ["abundance", value_to_compare]
        ].dropna(subset="abundance")
        df_pivot = df_pivot.drop("abundance", axis=1)
    else:
        df_pivot = df_data_for_correlations.loc[df.index, value_to_compare]
    if not include_boundary_reactions:
        df_pivot = df_pivot[
            ~df_pivot.index.isin(
                model.reactions.query(lambda x: x.boundary).list_attr("id"),
                level="reactions",
            )
        ]
    df_pivot = df_pivot.reset_index(drop=False)
    df_pivot = df_pivot.pivot(
        columns="model", index="reactions", values=value_to_compare
    )
    df = pd.merge(df, df_pivot, left_index=True, right_index=True).sort_values("pvalue")
    df.index.name = "reactions"
    df = df.reset_index(drop=False).set_index(["reactions", "pvalue"]).T
    df = pd.concat(
        [
            # Sort index by donor number and subgroup while concatenating
            df.loc[model_groups[g]].sort_index(
                key=lambda x: [int(donor_re.search(v).group("num")) for v in x.values]
            )
            for g in key
        ],
        axis=0,
    )
    # Remove pcmodel ID from sample names
    df.index = [sample_id.replace(f"{pcmodel.id}_", "") for sample_id in df.index]
    if remove_group_name_from_samples:
        df.index = [
            "_".join([x for x in sample_id.split("_") if x != group_name])
            for sample_id in df.index
        ]

    df = df.T.reset_index(drop=False)
    # Enrich results
    df["name"] = [
        r.name for r in model.reactions.get_by_any(list(df["reactions"].values))
    ]
    df["stoichiometry"] = [
        r.build_reaction_string(use_metabolite_names=True)
        for r in model.reactions.get_by_any(list(df["reactions"].values))
    ]
    df["subsystem"] = [
        r.subsystem for r in model.reactions.get_by_any(list(df["reactions"].values))
    ]
    df["category"] = df["subsystem"].replace(subsystem_to_category_dict)
    df["proteins"] = [
        ";".join(sorted([g.id for g in r.genes]))
        for r in model.reactions.get_by_any(list(df["reactions"].values))
    ]
    # Replace commas to prevent issues with CSV export
    df["subsystem"] = df["subsystem"].apply(lambda x: x.replace(",", ""))
    df["category"] = df["category"].apply(lambda x: x.replace(",", ""))
    df["pvalue"] = df["pvalue"].apply(lambda x: round(x, 5))

    df = df.set_index("reactions")
    if sort_by_subsystem:
        df = df.sort_values(by=["category", "subsystem", "proteins"])

    df_meta = df.loc[:, metadata_columns].copy()
    df_data = df.loc[:, ~df.columns.isin(df_meta.columns)].copy()

    if use_group_means:
        phenotypes_for_key = [k.split("_")[-1] for k in key]
        df_data = pd.concat(
            [
                df_data.loc[
                    :,
                    [
                        phenotype_re.search(sample_id).group("phenotype") == phenotype
                        for sample_id in df_data.columns
                    ],
                ].mean(axis=1)
                for phenotype in phenotypes_for_key
            ],
            axis=1,
        )
        df_data.columns = phenotypes_for_key
    if standardize_by == "mean":
        df_data = (
            df_data.sub(df_data.mean(axis=1), axis=0)
            .div(df_data.std(axis=1), axis=0)
            .dropna(how="all", axis=0)
        )
    elif standardize_by == "median":
        df_data = (
            (df_data.T - df_data.median(axis=1))
            / (df_data.quantile(q=0.75, axis=1) - df_data.quantile(q=0.25, axis=1))
        ).T
    else:
        pass
    # Put dataframes back together for custom reordering
    significant_dataframes[key] = df_data.merge(
        df_meta, left_index=True, right_index=True
    )
    print(key)

key = tuple(ordered_group_to_compare)
# key = ('Pre_HumCan', 'Pre_A')
significant_dataframes[key]

In [None]:
# key = tuple(ordered_group_to_compare)
# df = significant_dataframes[key]
# df = df.loc[:, ~df.columns.isin(["proteins", "pvalue", "subsystem", "category", "name", "stoichiometry"])].T
# df.index = pd.MultiIndex.from_tuples([(x, phenotype_re.search(x).group(1)) for x in df.index])
# df = df.groupby(level=1).mean().T
# df = df[["HumCan", "A", "MED"]]

# print(df.min().min(), df.max().max())
# df = df.merge(df_meta, left_index=True, right_index=True)
# ordered_subsystems = [
#     "Pentose phosphate pathway",
#     "Galactose metabolism",
#     "Glycolysis / Gluconeogenesis",
#     "Transport extracellular",
#     "Alanine aspartate and glutamate metabolism",
#     "Miscellaneous",
#     "Protein modification",
# ]
# df = pd.concat(
#     [
#         df[df["subsystem"] == subsystem].sort_values(by="pvalue")
#         for subsystem in ordered_subsystems
#     ],
#     axis=0,
# )
# significant_dataframes[key] = df

# df

In [None]:
# key = ("Pre_HumCan", "Pre_A")
# df_main = significant_dataframes[key]
# df_main.loc[["O2t", "O2St", "NOt", "H2Ot", "NH4t", "NH3t", "CO2t"], "subsystem"] = (
#     "Transport gas"
# )
# df_main.loc[
#     [
#         "FAt_hs_3_0",
#         "3DGt",
#         "HEMATINABCte",
#         "E217BGLCRABCte",
#         "5FLURAABCte",
#         "URATEABCte",
#         "Clt",
#         "NO3t",
#         "LDOPAt",
#         "5FLURAt",
#         "E217BGLCRte",
#     ],
#     "subsystem",
# ] = "Transport other"
# ordered_subsystems = [
#     "Pentose phosphate pathway",
#     "Galactose metabolism",
#     "Glycolysis / Gluconeogenesis",
#     "Pyruvate metabolism",
#     "Protein modification",
#     "Reactive species formation and detoxification",
#     "Miscellaneous",
#     "Cysteine and methionine metabolism",
#     "Glutathione metabolism",
#     "Phenylalanine tyrosine and tryptophan metabolism",
#     "Transport other",
#     "Transport gas",
#     "Transport extracellular",
# ]
# df_main = pd.concat(
#     [
#         (
#             df_main[df_main["subsystem"] == subsystem]
#             if subsystem != "Protein modification"
#             else df_main[df_main["subsystem"] == subsystem].iloc[::-1]
#         )
#         for subsystem in ordered_subsystems
#     ],
#     axis=0,
# )
# significant_dataframes[key] = df_main.copy()
# df_main

## Export results

In [None]:
ftype = "csv"
for key, df_main in significant_dataframes.items():
    df_meta = df_main.loc[:, metadata_columns].copy()
    df_data = df_main.loc[:, ~df_main.columns.isin(df_meta.columns)].copy()
    for df_type, df in zip(["data", "meta"], [df_data, df_meta]):
        filename = "_".join(
            ["MannWhiteney" if mannwhitneyu_for_pairs and len(key) == 2 else "Kruskal"]
            + [g.split("_")[-1] for g in key]
            + [df_type]
        )
        if use_group_means:
            filename += "_mean"
        filename = group_results_dirpath_dict[group_name] / filename
        df.to_csv(
            f"{filename}.{ftype}", sep="\t" if ftype == "tsv" else ",", index=True
        )