In [1]:
import mummichog as mg
import pandas as pd
import numpy as np
import plotly.express as px
import os
import re

![Sample Plot](Figures/map1.png)

![Sample Plot](Figures/map2.png)


![Sample Plot](Figures/map3.png)

In [2]:
# Mapping 1: MO CPDs to Chebi
def map1(mummichog_input_pt, mummichog_output_pt, sig_matches_pt, cut_off_p, funnel_plot=False, track="feat", conv_to_chebi=None, title=None, save_path=None):
    print("Input to Mummichog")
    input_df = pd.read_csv(mummichog_input_pt)
    filtered_input = input_df[input_df['p-value'] < cut_off_p]
    print(f"{len(filtered_input)} significant features out of {len(input_df)} total features")

    print()

    print("Mummichog Output")
    df = pd.read_csv(mummichog_output_pt, sep="\t")
    filtered_df = df[df['p_value'] < cut_off_p]
    print("Sig rows:", len(filtered_df))
    print("Unique, sig, input features:", filtered_df['CompoundID_from_user'].nunique())
    print("Unique, sig, eids:", filtered_df['EID'].nunique())
    filtered_cpd_names_df = filtered_df[filtered_df['compound_names'].notna()]
    print("Sig rows and compound names present:", len(filtered_cpd_names_df))
    print("Unique, sig, compound names, input features:", filtered_cpd_names_df['CompoundID_from_user'].nunique())
    print("Unique, sig, compound names, eids:", filtered_cpd_names_df['EID'].nunique())

    print()

    print("Matches to Chebi and GT")
    df_matches = pd.read_csv(sig_matches_pt)
    filtered_matches = df_matches[df_matches['p_value'] < cut_off_p]
    assert len(df_matches)==len(filtered_matches), f"Oh oh {len(df_matches)} not equal to {len(filtered_matches)}"
    print("Sig rows:", len(filtered_matches))
    print("Unique sig input features:", filtered_matches["compoundID_from_user"].nunique())
    print("Unique sig eids:", filtered_matches["eid"].nunique())

    if funnel_plot:
        if track=="feat":
            label_to_count = {
                "Feature Matrix": len(filtered_input),
                "Mummichog Output": filtered_df['CompoundID_from_user'].nunique(),
                "Cpd Name Present": filtered_cpd_names_df['CompoundID_from_user'].nunique(),
                "Successful Conv to Chebi": conv_to_chebi,
                "Matches to Chebi and GT": filtered_matches["compoundID_from_user"].nunique(),
            }
        elif track=="eid":
            label_to_count = {
                "Feature Matrix": len(filtered_input),
                "Mummichog Output": filtered_df['EID'].nunique(),
                "Cpd Name Present": filtered_cpd_names_df['EID'].nunique(),
                "Successful Conv to Chebi": conv_to_chebi,
                "Matches to Chebi and GT": filtered_matches["eid"].nunique(),
            }
        else:
            print("Please pick a track: either feat or eid")

        funnel_plot_func(label_to_count, title, save_path)
        

In [3]:
# Mapping 2: GT Chebis to KEGG
def map2(mummichog_input_pt, mummichog_output_pt, sig_matches_pt, cut_off_p, funnel_plot=False, track="feat", title=None, save_path=None):
    print("Input to Mummichog")
    input_df = pd.read_csv(mummichog_input_pt)
    filtered_input = input_df[input_df['p-value'] < cut_off_p]
    print(f"{len(filtered_input)} significant features out of {len(input_df)} total features")

    print()

    print("Mummichog Output")
    df = pd.read_csv(mummichog_output_pt, sep="\t")
    filtered_df = df[df['p_value'] < cut_off_p]
    filtered_df = filtered_df.sort_values(by='p_value', ascending=True)
    print("Sig rows:", len(filtered_df))
    print("Unique sig input features:", filtered_df['CompoundID_from_user'].nunique())
    print("Unique sig eids:", filtered_df['EID'].nunique())

    print()

    print("Matches to KEGG and GT")
    df_matches = pd.read_csv(sig_matches_pt)
    filtered_matches = df_matches[df_matches['p_value'] < cut_off_p]
    assert len(df_matches)==len(filtered_matches), f"Oh oh {len(df_matches)} not equal to {len(filtered_matches)}"
    print("Sig rows:", len(filtered_matches))
    print("Unique sig input features:", filtered_matches["compoundID_from_user"].nunique())
    print("Unique sig eids:", filtered_matches["eid"].nunique())

    if funnel_plot:
        if track=="feat":
            label_to_count = {
                "Feature Matrix": len(filtered_input),
                "Mummichog Output": filtered_df['CompoundID_from_user'].nunique(),
                "Matches to KEGG and GT": filtered_matches["compoundID_from_user"].nunique(),
            }
        elif track=="eid":
            label_to_count = {
                "Feature Matrix": len(filtered_input),
                "Mummichog Output": filtered_df['EID'].nunique(),
                "Matches to Chebi and GT": filtered_matches["eid"].nunique(),
            }
        else:
            print("Please pick a track: either feat or eid")

        funnel_plot_func(label_to_count, title, save_path)

In [4]:
# Mapping 3: MO KEGG to Chebi
def map3(mummichog_input_pt, mummichog_output_pt, sig_matches_pt, cut_off_p, funnel_plot=False, track="feat", conv_to_chebi=None, title=None, save_path=None):
    print("Input to Mummichog")
    input_df = pd.read_csv(mummichog_input_pt)
    filtered_input = input_df[input_df['p-value'] < cut_off_p]
    print(f"{len(filtered_input)} significant features out of {len(input_df)} total features")

    print()

    print("Mummichog Output")
    df = pd.read_csv(mummichog_output_pt, sep="\t")
    filtered_df = df[df['p_value'] < cut_off_p]
    print("Sig rows:", len(filtered_df))
    print("Unique, sig, input features:", filtered_df['CompoundID_from_user'].nunique())
    print("Unique, sig, eids:", filtered_df['EID'].nunique())

    print()

    print("Matches to Chebi and GT")
    df_matches = pd.read_csv(sig_matches_pt)
    filtered_matches = df_matches[df_matches['p_value'] < cut_off_p]
    assert len(df_matches)==len(filtered_matches), f"Oh oh {len(df_matches)} not equal to {len(filtered_matches)}"
    print("Sig rows:", len(filtered_matches))
    print("Unique sig input features:", filtered_matches["compoundID_from_user"].nunique())
    print("Unique sig eids:", filtered_matches["eid"].nunique())

    if funnel_plot:
        if track=="feat":
            label_to_count = {
                "Feature Matrix": len(filtered_input),
                "Mummichog Output": filtered_df['CompoundID_from_user'].nunique(),
                "Successful Conv to Chebi": conv_to_chebi,
                "Matches to Chebi and GT": filtered_matches["compoundID_from_user"].nunique(),
            }
        elif track=="eid":
            label_to_count = {
                "Feature Matrix": len(filtered_input),
                "Mummichog Output": filtered_df['EID'].nunique(),
                "Successful Conv to Chebi": conv_to_chebi,
                "Matches to Chebi and GT": filtered_matches["eid"].nunique(),
            }
        else:
            print("Please pick a track: either feat or eid")

        funnel_plot_func(label_to_count, title, save_path)
        

In [5]:
def funnel_plot_func(label_to_count, title, save_path=None):
    data = dict(count=list(label_to_count.values()),
                label=list(label_to_count.keys()))

    fig = px.funnel(data, x='count', y='label')
    fig.update_layout(
        title=title,
        title_font_size=30,  # Title font size
        font=dict(
            family="Arial, sans-serif",  # You can change the font family as needed
            size=25,  # General font size for labels, axis, etc.
            color="black"  # Optional: Set the font color
        )
    )
    if save_path:
        fig.write_image(save_path, scale=3, width=1200, height=800)
    fig.show(renderer="vscode")

In [13]:
cut_off_p = 0.05
track = "feat" # "feat" or "eid"
map1(
    mummichog_input_pt="/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/input_data/mummichog_input_ttest_rsd_1.csv",
    mummichog_output_pt="/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/runs/trans_omic_covid_data.rsd_1_default_p/tables/userInput_to_EmpiricalCompounds.tsv",
    sig_matches_pt=f"/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/runs/trans_omic_covid_data.rsd_1_default_p/analysis/sig_matches_rsd_1_default_p_chebi3_dups_{str(cut_off_p).replace('.', '_')}.csv",
    cut_off_p=cut_off_p,
    funnel_plot=True,
    track=track,
    conv_to_chebi=198,
    title=f"Significant ({str(cut_off_p)}) {track} - Map 1",
    save_path=f"/Users/pranathipoojary/Projects/mummichog_proj/Figures/report/funnel_{str(cut_off_p).replace('.', '_')}_{track}_map1.png"
    )
# map1(
#     mummichog_input_pt="/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/input_data/mummichog_input_ttest.csv",
#     mummichog_output_pt="/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/runs/trans_omic_covid_data.run_1_default/tables/userInput_to_EmpiricalCompounds.tsv",
#     sig_matches_pt=f"/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/runs/trans_omic_covid_data.run_1_default/analysis/sig_matches_run_1_default_chebi3_dups_{str(cut_off_p).replace('.', '_')}.csv",
#     cut_off_p=cut_off_p,
#     funnel_plot=True,
#     track=track,
#     conv_to_chebi=24, #feat=25, eid=24
#     title=f"Significant ({str(cut_off_p)}) {track} - Map 1",
#     save_path=f"/Users/pranathipoojary/Projects/mummichog_proj/Figures/report/funnel_run_1_default_{str(cut_off_p).replace('.', '_')}_{track}_map1.png"
#     )

Input to Mummichog
2634 significant features out of 13175 total features

Mummichog Output
Sig rows: 359
Unique, sig, input features: 315
Unique, sig, eids: 261
Sig rows and compound names present: 345
Unique, sig, compound names, input features: 305
Unique, sig, compound names, eids: 250

Matches to Chebi and GT
Sig rows: 52
Unique sig input features: 50
Unique sig eids: 32


In [7]:
cut_off_p=0.001
track = "feat" #"feat" or "eid"
map2(
    mummichog_input_pt="/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/input_data/mummichog_input_ttest_rsd_1.csv",
    mummichog_output_pt="/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/runs/trans_omic_covid_data.rsd_1_default_p/tables/userInput_to_EmpiricalCompounds.tsv",
    sig_matches_pt=f"/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/runs/trans_omic_covid_data.rsd_1_default_p/analysis/sig_matches_rsd_1_default_p_KEGG_v2_dups_{str(cut_off_p).replace('.', '_')}.csv",
    cut_off_p=cut_off_p,
    funnel_plot=True,
    track=track,
    title=f"Significant ({str(cut_off_p)}) {track} - Map 2",
    save_path=f"/Users/pranathipoojary/Projects/mummichog_proj/Figures/report/funnel_{str(cut_off_p).replace('.', '_')}_{track}_map2.png"
    
    )

Input to Mummichog
601 significant features out of 13175 total features

Mummichog Output
Sig rows: 129
Unique sig input features: 111
Unique sig eids: 93

Matches to KEGG and GT
Sig rows: 43
Unique sig input features: 38
Unique sig eids: 21


In [8]:
cut_off_p = 0.05
track = "feat" # "feat" or "eid"
map3(
    mummichog_input_pt="/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/input_data/mummichog_input_ttest_rsd_1.csv",
    mummichog_output_pt="/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/runs/trans_omic_covid_data.rsd_1_default_p/tables/userInput_to_EmpiricalCompounds.tsv",
    sig_matches_pt=f"/Users/pranathipoojary/Projects/mummichog_proj/untargeted_pa/mummichog/runs/trans_omic_covid_data.rsd_1_default_p/analysis/sig_matches_rsd_1_default_p_chebi_v3_map3_dups_{str(cut_off_p).replace('.', '_')}.csv",
    cut_off_p=cut_off_p,
    funnel_plot=True,
    track=track,
    conv_to_chebi=120,
    title=f"Significant ({str(cut_off_p)}) {track} - Map 3",
    save_path=f"/Users/pranathipoojary/Projects/mummichog_proj/Figures/report/funnel_{str(cut_off_p).replace('.', '_')}_{track}_map3.png"
    )

Input to Mummichog
2634 significant features out of 13175 total features

Mummichog Output
Sig rows: 359
Unique, sig, input features: 315
Unique, sig, eids: 261

Matches to Chebi and GT
Sig rows: 54
Unique sig input features: 51
Unique sig eids: 33
