# Parse the results of new clixo, output relevant files for cytoscape analysis

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import pandas as pd
import random


# latex rendering of text in graphs
import matplotlib as mpl

from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']

sns.set_style('white')

import sys
from IPython.display import display
% matplotlib inline

In [2]:
from ddot import Ontology

# Load the network genes

In [4]:
ASD_CHD_df = pd.read_excel('../data/supplemental_tables_cell_systems_210416.xlsx',sheet_name='Table S4',skiprows=1)

print(len(ASD_CHD_df))
ASD_CHD_df.index=ASD_CHD_df['gene']
ASD_CHD_df.head()

844


Unnamed: 0_level_0,gene,ASD_seed,CHD_seed,z_ASD,z_CHD,z_both,gtex_heart_percentile_exp,gtex_brain_percentile_exp,PCGC/PHN dual phenotype dDNV,"sum_dual_pheno_damaging_variants (DECIPHER 11.1, PCGC/PHN)",ifKnown_ASD,ifKnown_CHD,both known,pVal_both,corrected pvalue for both (bonferroni),lowest Z score,ASD-CHD risk gene,Potentially Novel ASD-CHD gene,Potentially Novel ASD gene,Potentially Novel CHD gene
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AC008575.1,AC008575.1,0,0,2.187602,2.844305,6.222208,,,0.0,0,No,No,no,2.451037e-10,9.068837e-06,2.187602,yes,yes,yes,Yes
AC067968.1,AC067968.1,0,0,2.055874,1.656669,3.405901,,,0.0,0,No,No,no,0.00032973,1.0,1.656669,no (corrected p > 0.1),,,
ACHE,ACHE,1,0,10.589728,0.644829,6.828567,0.506493,0.832242,0.0,0,"Sanders,2015; SFARI Gene: Tier 2",No,no,4.288347e-12,1.586689e-07,0.644829,no (min Z < 1.5),,,
ACTB,ACTB,0,1,0.492473,20.31131,10.002762,0.997572,0.998839,1.0,1,SFARI Gene: Tier S,"Jin 2017 (recurrent), H-CHD",yes,0.0,0.0,0.492473,no (min Z < 1.5),,,
ACTN2,ACTN2,0,0,2.982866,1.662443,4.958843,0.997941,0.854783,0.0,0,No,No,no,3.545705e-07,0.01311911,1.662443,yes,yes,yes,Yes


# Load PCnet

Download PCnet from https://ndexbio.org/#/network/f93f402c-86d4-11e7-a10d-0ac135e8bacf (UUID: f93f402c-86d4-11e7-a10d-0ac135e8bacf)

In [5]:
G_pcnet = nx.read_gpickle('/Users/brinrosenthal/Documents/CCBB_tickets_data/PCnet/G_PCnet.gpickle')

print(len(G_pcnet.nodes()))
print(len(G_pcnet.edges()))

19781
2724724


In [6]:
# make a subgraph from ASD_CHD_df genes
G_ASD_CHD = nx.subgraph(G_pcnet,ASD_CHD_df.index.tolist())
print(len(G_ASD_CHD.nodes()))
print(len(G_ASD_CHD.edges()))

844
28559


# Load similarity matrix 

In [7]:
sim_rank_EL = pd.read_csv('new_clixo_files/sim_rank_cosine_EL.tsv',sep='\t',names=['node1','node2','sim'])
sim_rank_EL.head()

Unnamed: 0,node1,node2,sim
0,SMARCC2,HIST1H4B,0.981613
1,SMARCC2,ZCCHC11,0.760676
2,SMARCC2,SMARCC1,1.0
3,SMARCC2,ZDHHC12,0.21293
4,SMARCC2,HIST1H4J,0.983393


# Read in the ontology

In [8]:
alpha='01'
beta='45'
filepath='new_clixo_files/ASDCHD.cosine..'+alpha+'..'+beta+'.ont'

In [9]:
ont = Ontology.from_table('new_clixo_files/ASDCHD_parsed_alpha.cosine.'+alpha+'_beta_.'+beta+'.txt',parent=0,child=1)

  table = pd.read_table(table, comment='#', header=header)


In [10]:
ont

844 genes, 120 terms, 2270 gene-term relations, 133 term-term relations
node_attributes: []
edge_attributes: ['Gene-Term']

# Add the fraction validated genes to ontology terms, write out for input to cytoscape

In [11]:
term_sizes = pd.Series(ont.term_sizes)
term_sizes.index = pd.Series(ont.terms_index).index.tolist()
term_sizes=term_sizes.sort_values(ascending=False)
term_sizes.head()

963    844
962    344
961    324
960    302
959    240
dtype: int64

In [12]:
# need to use ont_connected to get actual genes in term (trace ontology)

ont_connected = pd.DataFrame(ont.connected())
ont_connected.index=pd.Series(ont.genes_index).index.tolist()+pd.Series(ont.terms_index).index.tolist()
ont_connected.columns=pd.Series(ont.genes_index).index.tolist()+pd.Series(ont.terms_index).index.tolist()
ont_connected.head()

Unnamed: 0,AC008575.1,AC067968.1,ACHE,ACTB,ACTN2,ACTN4,ADAM17,ADGRL2,ADNP,AFDN,...,954,955,956,957,958,959,960,961,962,963
AC008575.1,True,False,False,False,False,False,False,False,False,False,...,False,True,False,False,True,False,True,True,True,True
AC067968.1,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
ACHE,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,True,False,True
ACTB,False,False,False,True,False,False,False,False,False,False,...,True,False,False,True,False,False,True,False,True,True
ACTN2,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,True,False,True,True,False,True


In [13]:
gene2term_full = ont_connected.iloc[0:len(ont.genes_index)].T.iloc[len(ont.genes_index):].T
gene2term_full.head()

Unnamed: 0,844,845,846,847,848,849,850,851,852,853,...,954,955,956,957,958,959,960,961,962,963
AC008575.1,False,False,False,True,False,False,False,False,False,False,...,False,True,False,False,True,False,True,True,True,True
AC067968.1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
ACHE,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,True,False,True
ACTB,False,False,False,False,False,False,False,False,False,False,...,True,False,False,True,False,False,True,False,True,True
ACTN2,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,True,False,True,True,False,True


In [14]:
# save out the pathways which have at least one validated xenopus gene
xenopus_validated_genes = ['SCN2A','KMT2A','CDK13','PTPN11','KMT2D','KANSL1','KAT6A','MAPT']
xenopus_pathways = gene2term_full.loc[xenopus_validated_genes]
# xenopus_pathways.sum().sort_values(ascending=False).to_csv('new_clixo_files/xenopus_validated_pathways_210419.tsv',sep='\t')

In [15]:
xenopus_pathways.sum().head()

844    1
845    0
846    0
847    0
848    0
dtype: int64

In [16]:
# write out the gene2term matrix
# gene2term_full.to_csv('new_clixo_files/gene2term_full_'+alpha+'_'+beta+'.csv')

# Run enrichment on each cluster, after discussion with Trey

Enrich each term separately, using gprofiler

Also run GO alignment, but this happens in a separate notebook (clixo_GO_alignment/ASD_CHD_new_clixo_cosine_GO_alignment_200428.ipynb)

In [15]:
from gprofiler import GProfiler
gp = GProfiler("MyToolName/0.1")

In [16]:
# loop over terms in gene2term_full
gene2term_full.head()

Unnamed: 0,844,845,846,847,848,849,850,851,852,853,...,954,955,956,957,958,959,960,961,962,963
AC008575.1,False,False,False,True,False,False,False,False,False,False,...,False,True,False,False,True,False,True,True,True,True
AC067968.1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
ACHE,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,True,False,True
ACTB,False,False,False,False,False,False,False,False,False,False,...,True,False,False,True,False,False,True,False,True,True
ACTN2,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,True,False,True,True,False,True


In [17]:
import os

savedir_profile = 'gprofiler_clixo_ASD_CHD_cosine_alpha_'+alpha+'_beta_'+beta
if not os.path.isdir(savedir_profile):
    os.mkdir(savedir_profile)

cluster2top_pathway = pd.DataFrame(columns=['cluster','pathway','term_ID',
                                            'term_p-value','term_recall','term_precision','num_genes_in_cluster'])

for focal_cluster in gene2term_full.columns.tolist():
    print(focal_cluster)
    focal_genes = gene2term_full[gene2term_full[focal_cluster]].index.tolist()
    print(len(focal_genes))
    if len(focal_genes)>5: # only run enrichment if > 5 genes in cluster
        # ---- USE THIS FOR BACKGROUND OF all PCnet genes ----
        gp_results = pd.DataFrame(gp.gprofile(focal_genes,custom_bg = G_pcnet.nodes(),correction_method=gp.THR_FDR))
        
        # ---- USE THIS FOR BACKGROUND OF 844 ASDCHD genes ----
        # gp_results = pd.DataFrame(gp.gprofile(focal_genes,custom_bg = gene2term_full.index.tolist(),correction_method=gp.THR_FDR))
        
        if len(gp_results)>0: # make sure there are some significant enrichments

            gp_results.columns = ["query.number", "significant", "p.value", "term.size",
                                  "query.size", "overlap.size", "recall", "precision",
                                  "term.id", "domain", "subgraph.number", "term.name",
                                  "relative.depth", "intersection"]
            print(gp_results[['p.value','precision','recall','term.size','overlap.size','term.id','term.name']].head())

            # uncomment if we want to save the results
            writer = pd.ExcelWriter(savedir_profile+'/cluster_'+str(focal_cluster)+'.xlsx')
            gp_results.to_excel(writer)
            writer.save()

            cluster2top_pathway = cluster2top_pathway.append(pd.DataFrame({'cluster':[focal_cluster],
                                                                          'pathway':gp_results.loc[0]['term.name'],
                                                                           'term_ID':gp_results.loc[0]['term.id'],
                                                                          'term_p-value':gp_results.loc[0]['p.value'],
                                                                           'term_recall':gp_results.loc[0]['recall'],
                                                                           'term_precision':gp_results.loc[0]['precision'],
                                                                          'num_genes_in_cluster':[len(focal_genes)]}),
                                                            ignore_index=True)
            
        else:
            
            cluster2top_pathway = cluster2top_pathway.append(pd.DataFrame({'cluster':[focal_cluster],
                                                                      'pathway':'None',
                                                                       'term_ID':'None',
                                                                      'term_p-value':1.0,
                                                                        'term_recall':'None',
                                                                        'term_precision':'None',
                                                                      'num_genes_in_cluster':[len(focal_genes)]}),
                                                        ignore_index=True)
            
    else:
        cluster2top_pathway = cluster2top_pathway.append(pd.DataFrame({'cluster':[focal_cluster],
                                                                      'pathway':'None',
                                                                       'term_ID':'None',
                                                                      'p-value':1.0,
                                                                        'term_recall':'None',
                                                                        'term_precision':'None',
                                                                      'num_genes_in_cluster':[len(focal_genes)]}),
                                                        ignore_index=True)

#gp_results = pd.DataFrame(gp.gprofile(focal_genes,correction_method=gp.THR_FDR))

844
6
        p.value  precision  recall  term.size  overlap.size  \
0  1.820000e-13      0.357   0.833         14             5   
1  8.960000e-13      0.172   1.000         29             5   
2  9.260000e-13      0.238   0.833         21             5   
3  1.020000e-12      0.217   0.833         23             5   
4  4.100000e-12      0.114   1.000         44             5   

              term.id                              term.name  
0          GO:0001518   voltage-gated sodium channel complex  
1   REAC:R-HSA-445095    Interaction between L1 and Ankyrins  
2          GO:0034706                 sodium channel complex  
3          GO:0005248  voltage-gated sodium channel activity  
4  REAC:R-HSA-5576892         Phase 0 - rapid depolarisation  
845
7
   p.value  precision  recall  term.size  overlap.size             term.id  \
0   0.0271        0.2   0.333          5             1  REAC:R-HSA-5624138   

                                           term.name  
0  Trafficking of m

        p.value  precision  recall  term.size  overlap.size     term.id  \
0  4.030000e-11      0.185   0.625         27             5  GO:0032452   
1  4.030000e-11      0.161   0.625         31             5  GO:0006482   
2  4.030000e-11      0.161   0.625         31             5  GO:0008214   
3  4.030000e-11      0.185   0.625         27             5  GO:0070076   
4  4.030000e-11      0.172   0.625         29             5  GO:0016577   

                      term.name  
0  histone demethylase activity  
1         protein demethylation  
2          protein dealkylation  
3  histone lysine demethylation  
4         histone demethylation  
857
10
   p.value  precision  recall  term.size  overlap.size             term.id  \
0  0.00173      0.039   0.667         51             2  REAC:R-HSA-6802949   
1  0.00183      0.027   0.667         74             2  REAC:R-HSA-6802957   
2  0.01000      0.008   0.667        239             2  REAC:R-HSA-5673001   
3  0.01000      0.008   0.

   p.value  precision  recall  term.size  overlap.size     term.id  \
0  0.00119      0.002   0.923       5740            12  GO:0006139   
1  0.00119      0.002   0.923       5126            12  GO:0090304   
2  0.00119      0.333   0.154          6             2  GO:0070087   
3  0.00119      0.002   0.846       4633            11  GO:0016070   
4  0.00119      0.006   0.538       1152             7  GO:0051276   

                                          term.name  
0  nucleobase-containing compound metabolic process  
1                    nucleic acid metabolic process  
2                      chromo shadow domain binding  
3                             RNA metabolic process  
4                           chromosome organization  
869
13
   p.value  precision  recall  term.size  overlap.size     term.id  \
0  0.00562      0.333   0.333          3             1  CORUM:2767   
1  0.00562      0.500   0.333          2             1   CORUM:263   
2  0.00562      0.333   0.333         

    p.value  precision  recall  term.size  overlap.size             term.id  \
0  0.000012      0.025   0.429        239             6  REAC:R-HSA-5673001   
1  0.000012      0.027   0.429        221             6   REAC:R-HSA-112315   
2  0.000012      0.024   0.429        245             6  REAC:R-HSA-5684996   
3  0.000015      0.200   0.214         15             3   REAC:R-HSA-442729   
4  0.000015      0.033   0.357        151             5   REAC:R-HSA-112314   

                                           term.name  
0                             RAF/MAP kinase cascade  
1              Transmission across Chemical Synapses  
2                              MAPK1/MAPK3 signaling  
3  CREB phosphorylation through the activation of...  
4  Neurotransmitter receptors and postsynaptic si...  
880
15
        p.value  precision  recall  term.size  overlap.size  \
0  7.300000e-16      0.018   0.933        768            14   
1  1.090000e-13      0.012   0.933       1152            14   

   p.value  precision  recall  term.size  overlap.size     term.id  \
0   0.0147      0.009   0.333        549             5  GO:0048667   
1   0.0147      0.008   0.333        624             5  GO:0048858   
2   0.0147      0.008   0.333        643             5  GO:0032990   
3   0.0147      0.008   0.333        622             5  GO:0120039   
4   0.0147      0.008   0.333        608             5  GO:0048812   

                                           term.name  
0  cell morphogenesis involved in neuron differen...  
1                      cell projection morphogenesis  
2                            cell part morphogenesis  
3  plasma membrane bounded cell projection morpho...  
4                    neuron projection morphogenesis  
892
18
        p.value  precision  recall  term.size  overlap.size     term.id  \
0  8.560000e-08      0.103   0.429         58             6  KEGG:05213   
1  2.800000e-07      0.048   0.500        147             7  KEGG:05224   
2  3.150000e-07  

        p.value  precision  recall  term.size  overlap.size     term.id  \
0  1.090000e-17      0.055   0.706        218            12  GO:0016741   
1  1.090000e-17      0.058   0.706        208            12  GO:0008168   
2  5.070000e-17      0.065   0.647        168            11  GO:0006479   
3  5.070000e-17      0.065   0.647        168            11  GO:0008213   
4  7.760000e-17      0.148   0.529         61             9  GO:0016279   

                                           term.name  
0  transferase activity, transferring one-carbon ...  
1                         methyltransferase activity  
2                                protein methylation  
3                                 protein alkylation  
4        protein-lysine N-methyltransferase activity  
905
24
   p.value  precision  recall  term.size  overlap.size     term.id  \
0  0.00421      0.333   0.333          3             1  CORUM:3634   
1  0.00421      0.333   0.333          3             1  CORUM:6502   
2 

915
33
        p.value  precision  recall  term.size  overlap.size     term.id  \
0  8.200000e-21      0.370     0.4         27            10  GO:0032452   
1  8.200000e-21      0.370     0.4         27            10  GO:0070076   
2  1.300000e-20      0.345     0.4         29            10  GO:0016577   
3  1.720000e-20      0.323     0.4         31            10  GO:0006482   
4  1.720000e-20      0.323     0.4         31            10  GO:0008214   

                      term.name  
0  histone demethylase activity  
1  histone lysine demethylation  
2         histone demethylation  
3         protein demethylation  
4          protein dealkylation  
916
33
        p.value  precision  recall  term.size  overlap.size  \
0  1.400000e-15      0.057   0.609        245            14   
1  5.660000e-15      0.049   0.609        284            14   
2  2.090000e-14      0.054   0.565        239            13   
3  1.800000e-10      0.018   0.500        869            16   
4  1.800000e-10 

        p.value  precision  recall  term.size  overlap.size  \
0  7.320000e-12      0.258   0.235         31             8   
1  1.310000e-11      0.188   0.300         48             9   
2  2.160000e-11      0.318   0.206         22             7   
3  2.160000e-11      0.545   0.176         11             6   
4  3.240000e-11      0.500   0.176         12             6   

              term.id                                          term.name  
0  REAC:R-HSA-2122948   Activated NOTCH1 Transmits Signal to the Nucleus  
1          KEGG:04330                            Notch signaling pathway  
2  REAC:R-HSA-2979096  NOTCH2 Activation and Transmission of Signal t...  
3   REAC:R-HSA-193692                    Regulated proteolysis of p75NTR  
4  REAC:R-HSA-1980150                                Signaling by NOTCH4  
928
48
        p.value  precision  recall  term.size  overlap.size  \
0  8.190000e-13      0.017   0.558       1441            24   
1  1.710000e-12      0.016   0.558    

        p.value  precision  recall  term.size  overlap.size  \
0  2.470000e-10      0.012   0.606       3435            40   
1  9.000000e-10      0.010   0.636       4091            42   
2  9.000000e-10      0.012   0.545       2925            36   
3  1.000000e-09      0.033   0.308        603            20   
4  1.560000e-09      0.008   0.742       5816            49   

               term.id                                          term.name  
0           GO:0005654                                        nucleoplasm  
1           GO:0019219  regulation of nucleobase-containing compound m...  
2           GO:0051173  positive regulation of nitrogen compound metab...  
3  MIRNA:hsa-miR-21-5p                                      hsa-miR-21-5p  
4           GO:0051171  regulation of nitrogen compound metabolic process  
940
69
        p.value  precision  recall  term.size  overlap.size  \
0  1.240000e-53      0.066   0.761        768            51   
1  1.340000e-46      0.135   0.7

951
110
        p.value  precision  recall  term.size  overlap.size     term.id  \
0  4.610000e-25      0.052   0.392        768            40  GO:0006325   
1  7.920000e-22      0.037   0.422       1152            43  GO:0051276   
2  3.800000e-18      0.013   0.784       5973            80  GO:0016043   
3  1.990000e-17      0.017   0.618       3634            63  GO:1903506   
4  1.990000e-17      0.013   0.784       6146            80  GO:0071840   

                                           term.name  
0                             chromatin organization  
1                            chromosome organization  
2                    cellular component organization  
3  regulation of nucleic acid-templated transcrip...  
4      cellular component organization or biogenesis  
952
119
        p.value  precision  recall  term.size  overlap.size     term.id  \
0  1.280000e-22      0.022   0.632       3435            74  GO:0005654   
1  4.880000e-21      0.018   0.684       4374        

963
844
        p.value  precision  recall  term.size  overlap.size  \
0  7.350000e-77      0.228   0.225        768           175   
1  6.470000e-65      0.358   0.226        274            98   
2  6.470000e-65      0.358   0.226        274            98   
3  3.050000e-60      0.268   0.157        456           122   
4  3.830000e-59      0.164   0.243       1152           189   

              term.id                        term.name  
0          GO:0006325           chromatin organization  
1  REAC:R-HSA-3247509      Chromatin modifying enzymes  
2  REAC:R-HSA-4839726           Chromatin organization  
3          GO:0016569  covalent chromatin modification  
4          GO:0051276          chromosome organization  


In [18]:
cluster2top_pathway.head()
cluster2top_pathway.index=cluster2top_pathway['cluster']
cluster2top_pathway.head()

Unnamed: 0_level_0,cluster,num_genes_in_cluster,pathway,term_ID,term_p-value,term_precision,term_recall
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
844,844,6,voltage-gated sodium channel complex,GO:0001518,1.82e-13,0.357,0.833
845,845,7,Trafficking of myristoylated proteins to the c...,REAC:R-HSA-5624138,0.0271,0.2,0.333
846,846,7,histone-lysine N-methyltransferase activity,GO:0018024,2.15e-16,0.149,1.0
847,847,8,hsa-miR-6789-3p,MIRNA:hsa-miR-6789-3p,0.027,0.03,0.4
848,848,8,Intra-Golgi and retrograde Golgi-to-ER traffic,REAC:R-HSA-6811442,0.0314,0.005,1.0


# add top gprofiler term per cluster for further analysis/ visualization in cytoscape

In [19]:
# add top pathway to ont2numVal
ont2numVal=ont2numVal.join(cluster2top_pathway[['num_genes_in_cluster','term_p-value',
                                                'term_precision','term_recall','pathway','term_ID']])

In [20]:
ont2numVal.to_csv('new_clixo_files/ont2numVal_cosine_alpha'+alpha+'_beta'+beta+'.txt',sep='\t')
# load precomputed ont2numVal so we don't have to enrich each pathway again
# ont2numVal = pd.read_csv('new_clixo_files/ont2numVal_cosine_alpha'+alpha+'_beta'+beta+'.txt',sep='\t',index_col='term')
# ont2numVal.head()

In [21]:
ont2numVal.head()

Unnamed: 0_level_0,all_val_genes,cluster_size,num_DECIPHER_CNV,num_DECIPHER_SNV,num_shared_DNV,term,fractionVal,num_genes_in_cluster,term_p-value,term_precision,term_recall,pathway,term_ID
term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
886,9,17,4,4,3,886,0.529412,17,8.5999999999999995e-19,0.04,0.824,nuclear chromatin,GO:0000790
920,19,37,5,13,9,920,0.513514,37,5.48e-13,0.172,0.357,Endometrial cancer,KEGG:05213
892,9,18,3,7,3,892,0.5,18,8.56e-08,0.103,0.429,Endometrial cancer,KEGG:05213
853,4,9,2,4,2,853,0.444444,9,3.45e-10,0.012,1.0,chromatin organization,GO:0006325
852,4,9,2,2,2,852,0.444444,9,1.12e-14,0.353,0.667,Ras activation upon Ca2+ influx through NMDA r...,REAC:R-HSA-442982


# write out ontology to cytoscape compatible format

In [26]:
# used to use the full ontology, but now we use the tree form for plotting
# ont.to_table('../clixo_hierarchy/new_clixo_no_alignment/ASD_CHD_cosine_'+alpha+'_'+beta+'.txt')

In [24]:
# write out ontology in tree form (this is what is used in making the figures)
tree = ont.get_tree()
tree_df = pd.DataFrame(zip(*tree)).T
tree_df.columns=['child','parent']
tree_df.to_csv('new_clixo_files/tree_ASDCHD_cosine_'+alpha+'_'+beta+'.csv')
tree_df.head()

Unnamed: 0,child,parent
0,PLEKHA7,861
1,FAM126A,963
2,ISX,874
3,SNTG2,953
4,PALM2,850
