In [1]:
import pandas as pd
import json
import pickle
from gseapy.parser import gsea_gmt_parser

In [2]:
with open("../kegg.json", "r") as f:
    pathway_names = json.load(f)
    
kegg_pathway = gsea_gmt_parser("../kegg.gmt")

### Co-expression network ORA

In [3]:
with open("../scz_bpd_co-exp_edges.pkl", "rb") as f:
    scz_bpd_edges = pickle.load(f)

with open("../t2dm_co-exp_edges.pkl", "rb") as f:
    dm_edges = pickle.load(f)
    
all_edge_intersection = scz_bpd_edges.intersection(dm_edges)
    
scz_bpd_df = pd.read_table("../../results/Supplementary Tables/STable 4. ORA of SCZ and BPD co-expression network.tsv", sep="\t")
dm_df = pd.read_table("../../results/Supplementary Tables/STable 5. ORA of T2DM co-expression network.tsv", sep="\t")

for path in set(scz_bpd_df["KEGG Pathway ID"]).intersection(set(dm_df["KEGG Pathway ID"])):
    overlapping_genes = all_edge_intersection.intersection(set(kegg_pathway[path]))
    if overlapping_genes:
        print(f'{pathway_names[path].replace(" - Homo sapiens (human)", "")}: {", ".join(overlapping_genes)}')

Human T-cell leukemia virus 1 infection: DLG1
Proteasome: PSMD8
Spinocerebellar ataxia: PSMD8
Alzheimer disease: PSMD8
RNA transport: EIF3A
Epstein-Barr virus infection: PSMD8
PI3K-Akt signaling pathway: YWHAQ
Influenza A: KPNA1
Endocytosis: CHMP5
Huntington disease: PSMD8
Parkinson disease: PSMD8


### Knowledge graph ORA

In [4]:
sc_bel_df = pd.read_table("../../Knowledge Graphs/schizophrenia_kg.tsv")
bp_bel_df = pd.read_table("../../Knowledge Graphs/bipolar_disorder_kg.tsv")
dm_bel_df = pd.read_table("../../Knowledge Graphs/t2dm_kg.tsv")

sc_common_edges = [(sc_bel_df.at[row, "source"], sc_bel_df.at[row, "target"]) for row in sc_bel_df.index]
bp_common_edges = [(bp_bel_df.at[row, "source"], bp_bel_df.at[row, "target"]) for row in bp_bel_df.index]
dm_common_edges = [(dm_bel_df.at[row, "source"], dm_bel_df.at[row, "target"]) for row in dm_bel_df.index]

sc_set = {node for edge in sc_common_edges for node in edge}
bp_set = {node for edge in bp_common_edges for node in edge}
dm_set = {node for edge in dm_common_edges for node in edge}

In [5]:
all_gene_intersection = sc_set.intersection(bp_set.intersection(dm_set))
    
scz_df = pd.read_table("../../results/Supplementary Tables/STable 7. Pathway enrichment of schizophrenia KG.tsv", sep="\t")
bpd_df = pd.read_table("../../results/Supplementary Tables/STable 8. Pathway enrichment of bipolar KG.tsv", sep="\t")
dm_df = pd.read_table("../../results/Supplementary Tables/STable 9. Pathway enrichment of T2DM KG.tsv", sep="\t")

for path in set(scz_df["KEGG Pathway ID"]).intersection(set(bpd_df["KEGG Pathway ID"]).intersection(set(dm_df["KEGG Pathway ID"]))):
    overlapping_genes = all_gene_intersection.intersection(set(kegg_pathway[path]))
    if overlapping_genes:
        print(f'{pathway_names[path].replace(" - Homo sapiens (human)", "")}: {", ".join(overlapping_genes)}')

Tuberculosis: AKT1, IL6, CASP3, TNF, IL1A, NFKB1
Acute myeloid leukemia: AKT1, STAT3, NFKB1
C-type lectin receptor signaling pathway: AKT1, IL6, TNF, NFKB1, PTGS2
Cocaine addiction: BDNF, NFKB1
Hepatitis B: AKT1, IL6, CASP3, TNF, CXCL8, STAT3, NFKB1
Longevity regulating pathway - multiple species: AKT1, IRS1, SOD2
Yersinia infection: AKT1, IL6, CCL2, TNF, CXCL8, NFKB1
Fc gamma R-mediated phagocytosis: AKT1
Central carbon metabolism in cancer: AKT1
Platinum drug resistance: CASP3, AKT1
Insulin secretion: GLP1R
Fluid shear stress and atherosclerosis: AKT1, CCL2, ICAM1, TNF, IL1A, NFKB1
Prion diseases: IL1A, IL6, C5
Viral protein interaction with cytokine and cytokine receptor: TNF, IL6, CCL2, CXCL8
Alzheimer disease: AKT1, IL6, IRS1, MAPT, CASP3, TNF, IL1A, NFKB1, PTGS2
Renal cell carcinoma: AKT1
Sphingolipid signaling pathway: AKT1, NFKB1, TNF
Amoebiasis: IL6, CASP3, TNF, CXCL8, NFKB1
Rheumatoid arthritis: IL6, CCL2, ICAM1, TNF, CXCL8, IL1A
Toxoplasmosis: AKT1, CASP3, TNF, STAT3, NFKB1
