In [2]:
import pandas as pd

crosstalk = pd.read_csv("KEGG_crosstalk.csv")
print("KEGG_crosstalk columns:", crosstalk.columns.tolist())

path_counts = pd.read_csv("pathway_gene_counts.csv")
print("pathway_gene_counts columns:", path_counts.columns.tolist())


KEGG_crosstalk columns: ['PATHWAY_ID1', 'PATHWAY_NAME1', 'PATHWAY_ID2', 'PATHWAY_NAME2', 'OVERLAP_COUNT', 'OVERLAP_GENES']
pathway_gene_counts columns: ['PATHWAY_ID', 'PATHWAY_NAME', 'NUMBER_OF_GENES']


In [3]:
import pandas as pd

crosstalk = pd.read_csv("KEGG_crosstalk.csv")
path_counts = pd.read_csv("pathway_gene_counts.csv")

OVERLAP_COL = "OVERLAP_COUNT"
GENE_COUNT_COL = "NUMBER_OF_GENES"

# merge pathway_ID1
crosstalk = crosstalk.merge(
    path_counts.rename(
        columns={"PATHWAY_ID": "PATHWAY_ID1", GENE_COUNT_COL: "N_GENES1"}
    ),
    on="PATHWAY_ID1",
    how="left"
)

# merge PATHWAY_ID2 
crosstalk = crosstalk.merge(
    path_counts.rename(
        columns={"PATHWAY_ID": "PATHWAY_ID2", GENE_COUNT_COL: "N_GENES2"}
    ),
    on="PATHWAY_ID2",
    how="left"
)

# calculate min-normalized overlap
crosstalk["NORMALIZED_OVERLAP_MIN"] = (
    crosstalk[OVERLAP_COL] /
    crosstalk[["N_GENES1", "N_GENES2"]].min(axis=1)
)

# calculate Jaccard index
crosstalk["JACCARD_INDEX"] = (
    crosstalk[OVERLAP_COL] /
    (crosstalk["N_GENES1"] + crosstalk["N_GENES2"] - crosstalk[OVERLAP_COL])
)

# check results
crosstalk_sorted = crosstalk.sort_values("JACCARD_INDEX", ascending=False)
crosstalk_sorted.head(10)


Unnamed: 0,PATHWAY_ID1,PATHWAY_NAME1,PATHWAY_ID2,PATHWAY_NAME2,OVERLAP_COUNT,OVERLAP_GENES,PATHWAY_NAME_x,N_GENES1,PATHWAY_NAME_y,N_GENES2,NORMALIZED_OVERLAP_MIN,JACCARD_INDEX
129,hsa05410,Hypertrophic cardiomyopathy - Homo sapiens (hu...,hsa05414,Dilated cardiomyopathy - Homo sapiens (human),89,ACTB;ACTC1;ACTG1;AGT;ATP2A1;ATP2A2;ATP2A3;CACN...,Hypertrophic cardiomyopathy - Homo sapiens (hu...,99,Dilated cardiomyopathy - Homo sapiens (human),105,0.89899,0.773913
365,hsa00980,Metabolism of xenobiotics by cytochrome P450 -...,hsa00982,Drug metabolism - cytochrome P450 - Homo sapie...,62,ADH1A;ADH1B;ADH1C;ADH4;ADH5;ADH6;ADH7;ALDH3A1;...,Metabolism of xenobiotics by cytochrome P450 -...,78,Drug metabolism - cytochrome P450 - Homo sapie...,72,0.861111,0.704545
3054,hsa00520,Amino sugar and nucleotide sugar metabolism - ...,hsa01250,Biosynthesis of nucleotide sugars - Homo sapie...,30,FCSK;FPGT;GALE;GALK1;GALK2;GALT;GCK;GFPT1;GFPT...,Amino sugar and nucleotide sugar metabolism - ...,38,Biosynthesis of nucleotide sugars - Homo sapie...,37,0.810811,0.666667
2510,hsa04940,Type I diabetes mellitus - Homo sapiens (human),hsa05330,Allograft rejection - Homo sapiens (human),33,CD28;CD80;CD86;FAS;FASLG;GZMB;HLA-A;HLA-B;HLA-...,Type I diabetes mellitus - Homo sapiens (human),44,Allograft rejection - Homo sapiens (human),39,0.846154,0.66
5457,hsa00591,Linoleic acid metabolism - Homo sapiens (human),hsa00592,alpha-Linolenic acid metabolism - Homo sapiens...,22,JMJD7-PLA2G4B;PLA2G10;PLA2G12A;PLA2G12B;PLA2G1...,Linoleic acid metabolism - Homo sapiens (human),30,alpha-Linolenic acid metabolism - Homo sapiens...,26,0.846154,0.647059
0,hsa05010,Alzheimer disease - Homo sapiens (human),hsa05022,Pathways of neurodegeneration - multiple disea...,337,ADRM1;AGER;AMBRA1;APAF1;APC;APC2;APP;ARAF;ATF4...,Alzheimer disease - Homo sapiens (human),389,Pathways of neurodegeneration - multiple disea...,481,0.866324,0.63227
46,hsa05224,Breast cancer - Homo sapiens (human),hsa05226,Gastric cancer - Homo sapiens (human),114,AKT1;AKT2;AKT3;APC;APC2;ARAF;AXIN1;AXIN2;BAK1;...,Breast cancer - Homo sapiens (human),148,Gastric cancer - Homo sapiens (human),150,0.77027,0.619565
2047,hsa05320,Autoimmune thyroid disease - Homo sapiens (human),hsa05330,Allograft rejection - Homo sapiens (human),35,CD28;CD40;CD40LG;CD80;CD86;FAS;FASLG;GZMB;HLA-...,Autoimmune thyroid disease - Homo sapiens (human),54,Allograft rejection - Homo sapiens (human),39,0.897436,0.603448
534,hsa00980,Metabolism of xenobiotics by cytochrome P450 -...,hsa05204,Chemical carcinogenesis - DNA adducts - Homo s...,56,CBR1;CYP1A1;CYP1A2;CYP1B1;CYP2A13;CYP2A6;CYP2A...,Metabolism of xenobiotics by cytochrome P450 -...,78,Chemical carcinogenesis - DNA adducts - Homo s...,71,0.788732,0.602151
292,hsa00562,Inositol phosphate metabolism - Homo sapiens (...,hsa04070,Phosphatidylinositol signaling system - Homo s...,66,CDIPT;IMPA1;IMPA2;INPP1;INPP4A;INPP4B;INPP5A;I...,Inositol phosphate metabolism - Homo sapiens (...,78,Phosphatidylinositol signaling system - Homo s...,98,0.846154,0.6
