In [365]:
import pandas as pd
import numpy as np
import os
import plotly.express as px

In [366]:
def process_gt():
    file_path = "m_MTBLS2542_Metabo_LC-MS_positive_reverse-phase_v2_maf.tsv"
    gt = pd.read_csv(file_path, sep="\t")
    gt['retention_time'] = gt['retention_time']*60

    print("\nGround Truth - True Metabolite info from COVID dataset")
    print(f"No of rows: {len(gt)}")
    print(f"No of unique ChEBI IDs: {len(np.unique(gt['database_identifier']))}")
    gt.head()
    
    return gt

In [367]:
def funnel_plot_func(label_to_count, title, save_path=None):
    data = dict(count=list(label_to_count.values()),
                label=list(label_to_count.keys()))

    fig = px.funnel(data, x='count', y='label')
    fig.update_layout(
        title=title,
        title_font_size=30,  # Title font size
        font=dict(
            family="Arial, sans-serif",  # You can change the font family as needed
            size=25,  # General font size for labels, axis, etc.
            color="black"  # Optional: Set the font color
        )
    )
    if save_path:
        fig.write_image(save_path, scale=3, width=1200, height=800)
    fig.show(renderer="vscode")

# Map 1
![Sample Plot](Figures/map1.png)

Link to metabolanalyst converter

In [368]:
def map1_process_mummichog_output(run_name, cut_off_p):
    print("\nMummichog Output (mo) - User input to emperical compounds")

    file_path = f"../runs/{run_name}/tables/userInput_to_EmpiricalCompounds.tsv"
    mo_raw_df = pd.read_csv(file_path, sep="\t")
    if cut_off_p:
        sig_df = mo_raw_df[mo_raw_df['p_value'] < cut_off_p]
        sig_no_nan = sig_df[sig_df['compound_names'].notna()]
        unique_eids = sig_df["EID"].nunique()
        unique_feats = sig_df["CompoundID_from_user"].nunique()
        unique_cpd_eids = sig_no_nan["EID"].nunique()
        unique_cpd_feats = sig_no_nan["CompoundID_from_user"].nunique()
    else:
        no_nan = mo_raw_df[mo_raw_df['compound_names'].notna()]
        unique_eids = mo_raw_df["EID"].nunique()
        unique_feats = mo_raw_df["CompoundID_from_user"].nunique()
        unique_cpd_eids = no_nan["EID"].nunique()
        unique_cpd_feats = no_nan["CompoundID_from_user"].nunique()
    
    print(f"Unique EIDS: {unique_eids}")
    print(f"Unique features: {unique_feats}")

    mo_no_nan_df = mo_raw_df.dropna(subset=["compound_names"]).copy()

    print(f"Number of original ouput rows: {len(mo_raw_df)}")
    print(f"Dropped {len(mo_raw_df)-len(mo_no_nan_df)} nan compound_names rows, {len(mo_no_nan_df)} rows")

    # a metabolite feature can have multiple annotations separated by a "$"
    mo_no_nan_df['compound_names_split'] = mo_no_nan_df['compound_names'].str.split('$')
    mo_df = mo_no_nan_df.explode('compound_names_split')
    mo_df = mo_df.rename(columns={'compound_names_split': 'compound_name'})
    mo_df = mo_df.drop(columns=['compound_names'])
    mo_df = mo_df[mo_df['compound_name'].str.strip() != ""]
    print(f"Number of rows after $ split: {len(mo_df)}")


    mo_df['compound_name_no_colon'] = mo_df['compound_name'].str.split(';').str[0]
    mo_df.to_csv(f"../runs/{run_name}/tables/map1/userInput_to_EmpiricalCompounds_cpd_name_split.csv")
    mo_df = mo_df.reset_index(drop=True)
    mo_df.head()

    print("ONLY USING THE FIRST SYNONYM (;)! MIGHT WANT TO CHANGE THAT LATER")

    return mo_df, unique_eids, unique_feats, unique_cpd_eids, unique_cpd_feats



In [369]:
def map1_merge_mo_to_chebi(mo_df, name_map_file, cut_off_p):
    chebi_map = pd.read_csv(name_map_file)
    chebi_map['compound_name'] = chebi_map['Query']
    chebi_map = chebi_map.drop(columns=['Query'])

    mo_df = mo_df.merge(chebi_map[['compound_name', 'ChEBI']], on='compound_name', how='left')
    
    if cut_off_p:
        fil_ui_cpd = mo_df[mo_df['p_value'] < cut_off_p]
    else:
        fil_ui_cpd = mo_df
    
    has_chebi = fil_ui_cpd[fil_ui_cpd['ChEBI'].notna()]
    after_chebi_conv_count_eids = has_chebi['EID'].nunique()
    after_chebi_conv_count_feats = has_chebi['CompoundID_from_user'].nunique()
    print(f"\nNumber of unique EIDs and with chebi ids (p_value-{cut_off_p}): {has_chebi['EID'].nunique()}") 
    print(f"Number of unique input features and with chebi ids (p_value-{cut_off_p}): {has_chebi['CompoundID_from_user'].nunique()}") 

    return mo_df, after_chebi_conv_count_eids, after_chebi_conv_count_feats


In [370]:
# ChEBI matches
def run_chebi_matches(run_name, mo_df, gt, cut_off_p):
    matches = []

    for index, row in mo_df.iterrows():
        chebi_id = row['ChEBI']

        if pd.notna(chebi_id):     
            echebi_id = str(f"CHEBI:{int(chebi_id)}")
            match = gt[gt['database_identifier'].str.fullmatch(echebi_id, case=False, na=False)]
            
            if not match.empty:
                matches.append({
                    "compound_name": row['compound_name'] if 'compound_name' in mo_df.columns else row['compound_names'],
                    "gt_compound_name": match['metabolite_identification'].iloc[0],
                    "ui_retention_time": row['retention_time'],
                    "gt_retention_time": match['retention_time'].iloc[0],
                    "rt_difference": abs(row['retention_time'] - match['retention_time'].iloc[0]),
                    "p_value": row['p_value'],
                    "compoundID_from_user": row['CompoundID_from_user'],
                    "gt_chebi": match['database_identifier'].iloc[0],
                    "ui_chebi": str(int(chebi_id)),
                    "gt_index": match.index[0],
                    "ui_index": index,
                    "EID": row["EID"]
                })
            

    matches_df = pd.DataFrame(matches)
    matches_df.head()
    
    end_name = run_name.split(".")[-1]
    str_cut_off_p = str(cut_off_p).replace(".", "_") if cut_off_p else "no_cut_off"
    save_filename = f'matches_{end_name}_chebi_dups_{str_cut_off_p}.csv'

    if cut_off_p:
        sig_df = matches_df[matches_df["p_value"]<cut_off_p]
        unique_match_feats = sig_df["compoundID_from_user"].nunique()
        unique_match_eids = sig_df["EID"].nunique()
        sig_df.to_csv(f'../runs/{run_name}/tables/map1/sig_{save_filename}')
        print(f"\n{len(matches_df)} matches found of which {len(sig_df)} are significant features")
    else:
        unique_match_feats = matches_df["compoundID_from_user"].nunique()
        unique_match_eids = matches_df["EID"].nunique()
        print(f"\n{len(matches_df)} matches found. Some may be duplicates!")


    matches_df.to_csv(f'../runs/{run_name}/tables/map1/{save_filename}')
    
    return unique_match_feats, unique_match_eids

In [371]:
def map1_generate_funnel_plot(
    run_name,
    cut_off_p,
    track,
    mummichog_input_file, 
    unique_eids,
    unique_feats,
    unique_cpd_eids,
    unique_cpd_feats,
    after_chebi_conv_count_eids,
    after_chebi_conv_count_feats,
    unique_match_eids,
    unique_match_feats
):
    input_df = pd.read_csv(mummichog_input_file)
    if cut_off_p:
        input_df = input_df[input_df['p-value'] < cut_off_p]
        title_prefix = f"Significant ({str(cut_off_p)})"
        save_prefix = str(cut_off_p).replace(".", "_")
    else:
        title_prefix = f"All"
        save_prefix = "all"

    unique_feat_matrix = input_df[input_df["custom_id"].notna()]['custom_id'].nunique()

    if track=="feat":
        label_to_count = {
            "Feature Matrix": unique_feat_matrix,
            "Mummichog Output": unique_feats,
            "Cpd Name Present": unique_cpd_feats,
            "Successful Conv to Chebi": after_chebi_conv_count_feats,
            "Matches to Chebi and GT": unique_match_feats,
        }
    elif track=="eid":
        label_to_count = {
            "Feature Matrix": unique_feat_matrix,
            "Mummichog Output": unique_eids,
            "Cpd Name Present": unique_cpd_eids,
            "Successful Conv to Chebi": after_chebi_conv_count_eids,
            "Matches to Chebi and GT": unique_match_eids
        }
    
    title = f"{title_prefix} {track} - Map 1"
    save_path = f"../runs/{run_name}/tables/map1/funnel_{save_prefix}_{track}_map1.png"
    funnel_plot_func(label_to_count, title, save_path)
    

In [372]:
def map1(run_name, mummichog_input_file=False, name_map_file=False, cut_off_p=False, plot=True, track="feat"):
    assert track in ["feat", "eid"], f"If provided, track must be either feat or eid not {track}"
    
    if not os.path.exists(f"../runs/{run_name}/tables/map1"):
        os.mkdir(f"../runs/{run_name}/tables/map1")
    print(f"Saving to ../runs/{run_name}/tables/map1")

    processed_result = map1_process_mummichog_output(run_name, cut_off_p)
    mo_df, unique_eids, unique_feats, unique_cpd_eids, unique_cpd_feats = processed_result

    if not name_map_file:
        msg = (
            "\nTo proceed with mapping, please map compound_name to ChEBI IDs using MetaboAnalyst.\n"
            "Paste the compound_name_no_colon column into MetaboAnalyst and download the name map.\n"
            "Pass the file path to that name map to continue. No API :("
        )

        print(msg)
        return

    merged_results = map1_merge_mo_to_chebi(mo_df, name_map_file, cut_off_p)
    mo_df, after_chebi_conv_count_eids, after_chebi_conv_count_feats = merged_results
    
    gt = process_gt()
    unique_match_feats, unique_match_eids = run_chebi_matches(run_name, mo_df, gt, cut_off_p)

    if plot:
        map1_generate_funnel_plot(
            run_name,
            cut_off_p,
            track,
            mummichog_input_file, 
            unique_eids,
            unique_feats,
            unique_cpd_eids,
            unique_cpd_feats,
            after_chebi_conv_count_eids,
            after_chebi_conv_count_feats,
            unique_match_eids,
            unique_match_feats
        )

In [373]:
# run_name = "trans_omic_covid_data.run_1_default"
run_name = "trans_omic_covid_data.rsd_1_default_p"
mummichog_input_file="../input_data/mummichog_input_ttest_rsd_1.csv"
cut_off_p = 0.05
map1(
    run_name=run_name, 
    mummichog_input_file=mummichog_input_file,
    name_map_file=f"../runs/{run_name}/tables/name_map_on_mummichog_output_cpds.csv",
    cut_off_p=cut_off_p,
    plot=True,
    track="feat"
)

Saving to ../runs/trans_omic_covid_data.rsd_1_default_p/tables/map1

Mummichog Output (mo) - User input to emperical compounds
Unique EIDS: 261
Unique features: 315
Number of original ouput rows: 1559
Dropped 79 nan compound_names rows, 1480 rows
Number of rows after $ split: 2699
ONLY USING THE FIRST SYNONYM (;)! MIGHT WANT TO CHANGE THAT LATER

Number of unique EIDs and with chebi ids (p_value-0.05): 150
Number of unique input features and with chebi ids (p_value-0.05): 198

Ground Truth - True Metabolite info from COVID dataset
No of rows: 515
No of unique ChEBI IDs: 503

201 matches found of which 52 are significant features


# Map 2
![Sample Plot](Figures/map2.png)

Link to metabolanalyst converter

In [374]:
def map2_process_mummichog_output(run_name, cut_off_p):
    print("\nMummichog Output (mo) - User input to emperical compounds")

    file_path = f"../runs/{run_name}/tables/userInput_to_EmpiricalCompounds.tsv"
    mo_raw_df = pd.read_csv(file_path, sep="\t")
    if cut_off_p:
        sig_df = mo_raw_df[mo_raw_df['p_value'] < cut_off_p]
        unique_eids = sig_df["EID"].nunique()
        unique_feats = sig_df["CompoundID_from_user"].nunique()
    else:
        unique_eids = mo_raw_df["EID"].nunique()
        unique_feats = mo_raw_df["CompoundID_from_user"].nunique()
    
    print(f"Unique EIDS: {unique_eids}")
    print(f"Unique features: {unique_feats}")

    mo_no_nan_df = mo_raw_df.dropna(subset=["compounds"]).copy()

    print(f"Number of original ouput rows: {len(mo_raw_df)}")
    print(f"Dropped {len(mo_raw_df)-len(mo_no_nan_df)} nan compounds (KEGG IDs) rows, {len(mo_no_nan_df)} rows")

    # a metabolite feature can have multiple annotations separated by a ";" in compound and "$" in compound_names
    mo_no_nan_df['compound_split'] = mo_no_nan_df['compounds'].str.split(';')
    mo_df = mo_no_nan_df.explode('compound_split')
    mo_df = mo_df.rename(columns={'compound_split': 'compound'})
    mo_df = mo_df.drop(columns=['compounds'])
    mo_df = mo_df[mo_df['compound'].str.strip() != ""]
    print(f"Number of rows after ; split: {len(mo_df)}")


    mo_df.to_csv(f"../runs/{run_name}/tables/map2/userInput_to_EmpiricalCompounds_kegg_split.csv", index=False)
    mo_df = mo_df.reset_index(drop=True)
    mo_df.head()

    return mo_df, unique_eids, unique_feats



In [375]:
def map2_merge_gt_to_kegg(gt_df, name_map_file):
    KEGG_map = pd.read_csv(name_map_file)
    KEGG_map['Query'] = "CHEBI:" + KEGG_map['Query'].astype(str)
    KEGG_map['database_identifier'] = KEGG_map['Query']
    KEGG_map = KEGG_map.drop(columns=['Query'])

    gt_df = gt_df.merge(KEGG_map[['database_identifier', 'KEGG']], on='database_identifier', how='left')
    print(f"\nNo of unique KEGG IDs: {gt_df[gt_df['KEGG'].notna()]['KEGG'].nunique()}")

    return gt_df

In [376]:
def map2_run_KEGG_matches(run_name, mo_df, gt_df, cut_off_p):
    matches = []

    for index, row in mo_df.iterrows():
        KEGG_id = row['compound']

        if pd.notna(KEGG_id):     
            match = gt_df[gt_df['KEGG'].str.fullmatch(KEGG_id, case=False, na=False)]
            
            if not match.empty:
                matches.append({
                    "compound_names": row['compound_names'],
                    "gt_compound_name": match['metabolite_identification'].iloc[0],
                    "ui_retention_time": row['retention_time'],
                    "gt_retention_time": match['retention_time'].iloc[0],
                    "rt_difference": abs(row['retention_time'] - match['retention_time'].iloc[0]),
                    "p_value": row['p_value'],
                    "compoundID_from_user": row['CompoundID_from_user'],
                    "gt_KEGG": match['KEGG'].iloc[0],
                    "ui_KEGG": str(KEGG_id),
                    "gt_index": match.index[0],
                    "ui_index": index,
                    "EID": row["EID"]
                })
            

    matches_df = pd.DataFrame(matches)
    matches_df.head()

    end_name = run_name.split(".")[-1]
    str_cut_off_p = str(cut_off_p).replace(".", "_") if cut_off_p else "no_cut_off"
    save_filename = f'matches_{end_name}_chebi_dups_{str_cut_off_p}.csv'

    if cut_off_p:
        sig_df = matches_df[matches_df["p_value"]<cut_off_p]
        unique_match_feats = sig_df["compoundID_from_user"].nunique()
        unique_match_eids = sig_df["EID"].nunique()
        sig_df.to_csv(f'../runs/{run_name}/tables/map2/sig_{save_filename}')
        print(f"\n{len(matches_df)} matches found of which {len(sig_df)} are significant features")
    else:
        unique_match_feats = matches_df["compoundID_from_user"].nunique()
        unique_match_eids = matches_df["EID"].nunique()
        print(f"\n{len(matches_df)} matches found. Some may be duplicates!")


    matches_df.to_csv(f'../runs/{run_name}/tables/map2/{save_filename}')

    return unique_match_feats, unique_match_eids

In [377]:
def map2_generate_funnel_plot(
    run_name,
    cut_off_p,
    track,
    mummichog_input_file, 
    unique_eids,
    unique_feats,
    unique_match_eids,
    unique_match_feats

):
    input_df = pd.read_csv(mummichog_input_file)
    if cut_off_p:
        input_df = input_df[input_df['p-value'] < cut_off_p]
        title_prefix = f"Significant ({str(cut_off_p)})"
        save_prefix = str(cut_off_p).replace(".", "_")
    else:
        title_prefix = f"All"
        save_prefix = "all"

    unique_feat_matrix = input_df[input_df["custom_id"].notna()]['custom_id'].nunique()

    if track=="feat":
        label_to_count = {
            "Feature Matrix": unique_feat_matrix,
            "Mummichog Output": unique_feats,
            "Matches to KEGG and GT": unique_match_feats,
        }
    elif track=="eid":
        label_to_count = {
            "Feature Matrix": unique_feat_matrix,
            "Mummichog Output": unique_eids,
            "Matches to Chebi and GT": unique_match_eids,
        }
    
    title = f"{title_prefix} {track} - Map 2"
    save_path = f"../runs/{run_name}/tables/map2/funnel_{save_prefix}_{track}_map2.png"
    funnel_plot_func(label_to_count, title, save_path)

In [378]:
def map2(run_name, mummichog_input_file=False, name_map_file=False, cut_off_p=False, plot=True, track="feat"):
    
    assert track in ["feat", "eid"], f"If provided, track must be either feat or eid not {track}"
    
    if not os.path.exists(f"../runs/{run_name}/tables/map2"):
        os.mkdir(f"../runs/{run_name}/tables/map2")
    print(f"Saving to ../runs/{run_name}/tables/map2")

    mo_df, unique_eids, unique_feats = map2_process_mummichog_output(run_name, cut_off_p)

    if not name_map_file:
        msg = (
            "\nTo proceed with mapping, please map compound_name to ChEBI IDs using MetaboAnalyst.\n"
            "Paste the compound_name_no_colon column into MetaboAnalyst and download the name map.\n"
            "Pass the file path to that name map to continue. No API :("
        )

        print(msg)
        return

    gt_df = process_gt()
    gt_df = map2_merge_gt_to_kegg(gt_df, name_map_file)

    unique_match_feats, unique_match_eids = map2_run_KEGG_matches(run_name, mo_df, gt_df, cut_off_p)

    if plot:
        map2_generate_funnel_plot(
            run_name,
            cut_off_p,
            track,
            mummichog_input_file, 
            unique_eids,
            unique_feats,
            unique_match_eids,
            unique_match_feats
        )

In [379]:
# run_name = "trans_omic_covid_data.run_1_default"
run_name = "trans_omic_covid_data.rsd_1_default_p"
mummichog_input_file="../input_data/mummichog_input_ttest_rsd_1.csv"
cut_off_p = 0.05
map2(
    run_name=run_name, 
    mummichog_input_file=mummichog_input_file,
    name_map_file="name_map_gt_chebi_to_kegg.csv",
    cut_off_p=cut_off_p,
    plot=True,
    track="feat"
)

Saving to ../runs/trans_omic_covid_data.rsd_1_default_p/tables/map2

Mummichog Output (mo) - User input to emperical compounds
Unique EIDS: 261
Unique features: 315
Number of original ouput rows: 1559
Dropped 0 nan compounds (KEGG IDs) rows, 1559 rows
Number of rows after ; split: 3078

Ground Truth - True Metabolite info from COVID dataset
No of rows: 515
No of unique ChEBI IDs: 503

No of unique KEGG IDs: 252

374 matches found of which 96 are significant features


# Map 3
![Sample Plot](Figures/map3.png)

Link to metabolanalyst converter

In [380]:
def map3_process_mummichog_output(run_name, cut_off_p):
    print("\nMummichog Output (mo) - User input to emperical compounds")

    file_path = f"../runs/{run_name}/tables/userInput_to_EmpiricalCompounds.tsv"
    mo_raw_df = pd.read_csv(file_path, sep="\t")
    if cut_off_p:
        sig_df = mo_raw_df[mo_raw_df['p_value'] < cut_off_p]
        unique_eids = sig_df["EID"].nunique()
        unique_feats = sig_df["CompoundID_from_user"].nunique()
    else:
        unique_eids = mo_raw_df["EID"].nunique()
        unique_feats = mo_raw_df["CompoundID_from_user"].nunique()
    
    print(f"Unique EIDS: {unique_eids}")
    print(f"Unique features: {unique_feats}")

    mo_no_nan_df = mo_raw_df.dropna(subset=["compounds"]).copy()

    print(f"Number of original ouput rows: {len(mo_raw_df)}")
    print(f"Dropped {len(mo_raw_df)-len(mo_no_nan_df)} nan compounds (KEGG IDs) rows, {len(mo_no_nan_df)} rows")

    # a metabolite feature can have multiple annotations separated by a ";" in compound and "$" in compound_names
    mo_no_nan_df['compound_split'] = mo_no_nan_df['compounds'].str.split(';')
    mo_df = mo_no_nan_df.explode('compound_split')
    mo_df = mo_df.rename(columns={'compound_split': 'compound'})
    mo_df = mo_df.drop(columns=['compounds'])
    mo_df = mo_df[mo_df['compound'].str.strip() != ""]
    print(f"Number of rows after ; split: {len(mo_df)}")


    mo_df.to_csv(f"../runs/{run_name}/tables/map3/userInput_to_EmpiricalCompounds_kegg_split.csv", index=False)
    mo_df = mo_df.reset_index(drop=True)
    mo_df.head()

    return mo_df, unique_eids, unique_feats


In [381]:
def map3_merge_mo_to_chebi(mo_df, name_map_file, cut_off_p):
    chebi_map = pd.read_csv(name_map_file)
    chebi_map['compound'] = chebi_map['Query']
    chebi_map = chebi_map.drop(columns=['Query'])

    mo_df = mo_df.merge(chebi_map[['compound', 'ChEBI']], on='compound', how='left')
    
    if cut_off_p:
        fil_ui_cpd = mo_df[mo_df['p_value'] < cut_off_p]
    else:
        fil_ui_cpd = mo_df
    
    has_chebi = fil_ui_cpd[fil_ui_cpd['ChEBI'].notna()]
    after_chebi_conv_count_eids = has_chebi['EID'].nunique()
    after_chebi_conv_count_feats = has_chebi['CompoundID_from_user'].nunique()
    print(f"\nNumber of unique EIDs and with chebi ids (p_value-{cut_off_p}): {has_chebi['EID'].nunique()}") 
    print(f"Number of unique input features and with chebi ids (p_value-{cut_off_p}): {has_chebi['CompoundID_from_user'].nunique()}") 

    return mo_df, after_chebi_conv_count_eids, after_chebi_conv_count_feats


In [382]:
def map3_generate_funnel_plot(
    run_name,
    cut_off_p,
    track,
    mummichog_input_file, 
    unique_eids,
    unique_feats,
    after_chebi_conv_count_eids,
    after_chebi_conv_count_feats,
    unique_match_eids,
    unique_match_feats
):
    input_df = pd.read_csv(mummichog_input_file)
    if cut_off_p:
        input_df = input_df[input_df['p-value'] < cut_off_p]
        title_prefix = f"Significant ({str(cut_off_p)})"
        save_prefix = str(cut_off_p).replace(".", "_")
    else:
        title_prefix = f"All"
        save_prefix = "all"

    unique_feat_matrix = input_df[input_df["custom_id"].notna()]['custom_id'].nunique()

    if track=="feat":
        label_to_count = {
            "Feature Matrix": unique_feat_matrix,
            "Mummichog Output": unique_feats,
            "Successful Conv to Chebi": after_chebi_conv_count_feats,
            "Matches to Chebi and GT": unique_match_feats,
        }
    elif track=="eid":
        label_to_count = {
            "Feature Matrix": unique_feat_matrix,
            "Mummichog Output": unique_eids,
            "Successful Conv to Chebi": after_chebi_conv_count_eids,
            "Matches to Chebi and GT": unique_match_eids
        }
    
    title = f"{title_prefix} {track} - Map 3"
    save_path = f"../runs/{run_name}/tables/map3/funnel_{save_prefix}_{track}_map3.png"
    funnel_plot_func(label_to_count, title, save_path)
    

In [383]:
def map3(run_name, mummichog_input_file=False, name_map_file=False, cut_off_p=False, plot=True, track="feat"):
    
    assert track in ["feat", "eid"], f"If provided, track must be either feat or eid not {track}"
    
    if not os.path.exists(f"../runs/{run_name}/tables/map3"):
        os.mkdir(f"../runs/{run_name}/tables/map3")
    print(f"Saving to ../runs/{run_name}/tables/map3")

    mo_df, unique_eids, unique_feats = map2_process_mummichog_output(run_name, cut_off_p)

    if not name_map_file:
        msg = (
            "\nTo proceed with mapping, please map compound_name to ChEBI IDs using MetaboAnalyst.\n"
            "Paste the compound_name_no_colon column into MetaboAnalyst and download the name map.\n"
            "Pass the file path to that name map to continue. No API :("
        )

        print(msg)
        return

    mo_df, after_chebi_conv_count_eids, after_chebi_conv_count_feats = map3_merge_mo_to_chebi(mo_df, name_map_file, cut_off_p)
    
    gt_df = process_gt()
    unique_match_feats, unique_match_eids = run_chebi_matches(run_name, mo_df, gt_df, cut_off_p)

    if plot:
        map3_generate_funnel_plot(
            run_name,
            cut_off_p,
            track,
            mummichog_input_file, 
            unique_eids,
            unique_feats,
            after_chebi_conv_count_eids,
            after_chebi_conv_count_feats,
            unique_match_eids,
            unique_match_feats
        )

In [384]:
# run_name = "trans_omic_covid_data.run_1_default"
run_name = "trans_omic_covid_data.rsd_1_default_p"
mummichog_input_file="../input_data/mummichog_input_ttest_rsd_1.csv"
cut_off_p = 0.05
map3(
    run_name=run_name, 
    mummichog_input_file=mummichog_input_file,
    name_map_file=f"../runs/{run_name}/tables/name_map_mo_kegg_to_chebi.csv",
    cut_off_p=cut_off_p,
    plot=True,
    track="feat"
)

Saving to ../runs/trans_omic_covid_data.rsd_1_default_p/tables/map3

Mummichog Output (mo) - User input to emperical compounds
Unique EIDS: 261
Unique features: 315
Number of original ouput rows: 1559
Dropped 0 nan compounds (KEGG IDs) rows, 1559 rows
Number of rows after ; split: 3078

Number of unique EIDs and with chebi ids (p_value-0.05): 87
Number of unique input features and with chebi ids (p_value-0.05): 120

Ground Truth - True Metabolite info from COVID dataset
No of rows: 515
No of unique ChEBI IDs: 503

200 matches found of which 54 are significant features
