In [1]:
import numpy as np
import pandas as pd
from functools import partial

from collections import defaultdict
from sklearn import metrics

import src.load_data as ld
import src.set_analysis_func as func



In [2]:
# load embedding
node_vectors = np.loadtxt(
    './data/embedding/node2vec_consensus.csv', delimiter=',')
node_list = []
with open('./data/embedding/consensus_node.txt', 'r') as f:
    for line in f:
        node_list.append(line.strip())
        
S = metrics.pairwise.cosine_similarity(node_vectors, node_vectors)

In [3]:
# create gene to embedding id mapping
g_node2index = {j:i for i,j in enumerate(node_list)}
g_index2node = {i:j for i,j in enumerate(node_list)}
g_node2index = defaultdict(lambda:-1, g_node2index)

In [4]:
# load gene set data
GO_data = ld.load_gmt(
    './data/gene_sets/hsa_experimental_eval_BP_propagated.gmt')

GO2indices = ld.term2indexes(
    GO_data, g_node2index, upper=300, lower=10)

In [5]:
# generate background gene list
GO_all_genes = set()
for x in GO_data:
    GO_all_genes = GO_all_genes.union(GO_data[x])
    
GO_all_genes = GO_all_genes.intersection(node_list)
GO_all_indices = [g_node2index[x] for x in GO_all_genes]

In [6]:
f = partial(func.andes, matrix=S, g1_term2index=GO2indices, 
            g2_term2index=GO2indices, g1_population=GO_all_indices, 
            g2_population=GO_all_indices)

In [7]:
# return ANDES raw and corrected score
f(('GO:0043648', 'GO:0006805'))

(0.3675551337770534, -0.046133571753024294)

## ANDES as GSEA

In [8]:
#load ranked list generated from gene expression data
ranked_list = pd.read_csv('./data/expression/GSE3467_de.txt',
                          sep='\t', index_col=0, header=None)
ranked_list = [str(y) for y in ranked_list.index]
ranked_list = [g_node2index[y] for y in ranked_list if y in node_list]

In [9]:
f = partial(func.gsea_andes, ranked_list=ranked_list, matrix=S, 
            term2indices=GO2indices, 
            annotated_indices=GO_all_indices)

In [10]:
f('GO:0043648')

(1.451297977761098, 0.6105019183325968)