In [None]:
import networkx as nx
import pandas as pd
from ora import run_ora, gmt_parser
import numpy as np
import gseapy
import pickle
import igraph
import json
import networkx as nx
import pandas as pd
import json

In [None]:
sc_bel_df = pd.read_table("../bel_graphs/schizophrenia_kg.tsv")
bp_bel_df = pd.read_table("../bel_graphs/bipolar_disorder_kg.tsv")
dm_bel_df = pd.read_table("../bel_graphs/t2dm_kg.tsv")

In [None]:
sc_common_edges = [(sc_bel_df.at[row, "source"], sc_bel_df.at[row, "target"]) for row in sc_bel_df.index]
bp_common_edges = [(bp_bel_df.at[row, "source"], bp_bel_df.at[row, "target"]) for row in bp_bel_df.index]
dm_common_edges = [(dm_bel_df.at[row, "source"], dm_bel_df.at[row, "target"]) for row in dm_bel_df.index]

In [None]:
sc_set = {node for edge in sc_common_edges for node in edge}
bp_set = {node for edge in bp_common_edges for node in edge}
dm_set = {node for edge in dm_common_edges for node in edge}

In [None]:
len(sc_set), len(bp_set), len(dm_set)

In [None]:
sc_ora_df = run_ora(
    gmt_path = "kegg.gmt", 
    set_gene_symbols = sc_set, 
    min_size = 15, 
    max_size = 500
)

bp_ora_df = run_ora(
    gmt_path = "kegg.gmt", 
    set_gene_symbols = bp_set, 
    min_size = 15, 
    max_size = 500
)

dm_ora_df = run_ora(
    gmt_path = "kegg.gmt", 
    set_gene_symbols = dm_set, 
    min_size = 15, 
    max_size = 500
)

In [None]:
with open("kegg.json", "r") as f:
    pathway_names = json.load(f)

enriched_sc_ora_df = sc_ora_df[sc_ora_df['q_value'] <= 0.05].copy()
enriched_sc_ora_df["pathway_name"] = enriched_sc_ora_df["pathway_id"].apply(lambda x: pathway_names[x])

enriched_bp_ora_df = bp_ora_df[bp_ora_df['q_value'] <= 0.05].copy()
enriched_bp_ora_df["pathway_name"] = enriched_bp_ora_df["pathway_id"].apply(lambda x: pathway_names[x])

enriched_dm_ora_df = dm_ora_df[dm_ora_df['q_value'] <= 0.05].copy()
enriched_dm_ora_df["pathway_name"] = enriched_dm_ora_df["pathway_id"].apply(lambda x: pathway_names[x])

In [None]:
enriched_sc_ora_df.to_csv("sc_enrich_ora.tsv", sep="\t")
enriched_bp_ora_df.to_csv("bp_enrich_ora.tsv", sep="\t")
enriched_dm_ora_df.to_csv("dm_enrich_ora.tsv", sep="\t")

In [None]:
kegg_map = json.load(open("kegg.json", "r"))

with open("kegg.gmt") as genesets:
    gmt_data = {
        kegg_map[line.strip().split("\t")[0]]: line.strip().split("\t")[2:] 
        for line in genesets.readlines()
    }

In [None]:
for pathway in enriched_sc_ora_df["pathway_name"]:
    pathway_data = gmt_data[pathway]
    count = 0
    
    for gene in pathway_data:
        if any(sc_bel_df["source"].str.contains(gene)) or any(sc_bel_df["target"].str.contains(gene)):
            count += 1
    
    if count/len(pathway_data) > 0.10:
        print(f"{pathway}: {count/len(pathway_data) * 100:.2f} ({count}/{len(pathway_data)})")

In [None]:
for pathway in enriched_bp_ora_df["pathway_name"]:
    pathway_data = gmt_data[pathway]
    count = 0
    
    for gene in pathway_data:
        if any(bp_bel_df["source"].str.contains(gene)) or any(bp_bel_df["target"].str.contains(gene)):
            count += 1
    
    if count/len(pathway_data) > 0.10:
        print(f"{pathway}: {count/len(pathway_data) * 100:.2f} ({count}/{len(pathway_data)})")

In [None]:
for pathway in enriched_dm_ora_df["pathway_name"]:
    pathway_data = gmt_data[pathway]
    count = 0
    
    for gene in pathway_data:
        if any(dm_bel_df["source"].str.contains(gene)) or any(dm_bel_df["target"].str.contains(gene)):
            count += 1
    
    if count/len(pathway_data) > 0.10:
        print(f"{pathway}: {count/len(pathway_data) * 100:.2f} ({count}/{len(pathway_data)})")

In [None]:
import matplotlib.pyplot as plt
from matplotlib_venn import venn3

plt.figure(figsize=(7,7))

set1 = set(enriched_sc_ora_df["pathway_id"])
set2 = set(enriched_bp_ora_df["pathway_id"])
set3 = set(enriched_dm_ora_df["pathway_id"])

intersection_all = len(set1.intersection(set3.intersection(set2)))

set1_label = "SCZ KG"
set2_label = "BPD KG"
set3_label = "T2DM KG"

v = venn3(subsets=(1, 1, 1, 1, 1, 1, 1), set_labels=(set1_label, set2_label, set3_label))
v.get_label_by_id('001').set_text(len(set3 - set2 - set1))
v.get_label_by_id('010').set_text(len(set2 - set1 - set3))
v.get_label_by_id('100').set_text(len(set1 - set2 - set3))
v.get_label_by_id('011').set_text(len(set2.intersection(set3)) - intersection_all)
v.get_label_by_id('101').set_text(len(set1.intersection(set3)) - intersection_all)
v.get_label_by_id('110').set_text(len(set2.intersection(set1)) - intersection_all)
v.get_label_by_id('111').set_text(intersection_all)

v.get_label_by_id('001').set_family("sans-serif")
v.get_label_by_id('010').set_family("sans-serif")
v.get_label_by_id('100').set_family("sans-serif")
v.get_label_by_id('011').set_family("sans-serif")
v.get_label_by_id('101').set_family("sans-serif")
v.get_label_by_id('110').set_family("sans-serif")
v.get_label_by_id('111').set_family("sans-serif")
v.get_label_by_id('A').set_family("sans-serif")
v.get_label_by_id('B').set_family("sans-serif")
v.get_label_by_id('C').set_family("sans-serif")

v.get_label_by_id('001').set_fontsize(18)
v.get_label_by_id('010').set_fontsize(18)
v.get_label_by_id('100').set_fontsize(18)
v.get_label_by_id('011').set_fontsize(18)
v.get_label_by_id('101').set_fontsize(18)
v.get_label_by_id('110').set_fontsize(18)
v.get_label_by_id('111').set_fontsize(18)
v.get_label_by_id('A').set_fontsize(18)
v.get_label_by_id('B').set_fontsize(18)
v.get_label_by_id('C').set_fontsize(18)

v.get_patch_by_id('001').set_color("#ff2b95")
v.get_patch_by_id('010').set_color("#35b44a")
v.get_patch_by_id('100').set_color("#e51e25")
v.get_patch_by_id('011').set_color("#1cade4")
v.get_patch_by_id('110').set_color("#ffaa4f")
v.get_patch_by_id('111').set_color("#0008ff")

v.get_patch_by_id('001').set_alpha(0.45)
v.get_patch_by_id('010').set_alpha(0.45)
v.get_patch_by_id('100').set_alpha(0.45)
v.get_patch_by_id('011').set_alpha(0.45)
v.get_patch_by_id('101').set_alpha(0.35)
v.get_patch_by_id('110').set_alpha(0.45)
v.get_patch_by_id('111').set_alpha(0.45)

plt.title("Overlap of enriched pathways between T2DM and psychiatric disorder KGs", fontsize=22, y=1.1)

plt.savefig("venn3.png", bbox_inches="tight")