In [None]:
import operator
import csv
import matplotlib.pyplot as plt
import networkx as nx

In [None]:
def load_cancer_gene_name_from_file(filename = "cg-2022-06.csv"):
    fields = []
    rows = []
    with open(filename, 'r') as csvfile:
        # creating a csv reader object
        csvreader = csv.reader(csvfile)
      
        # extracting field names through first row
        fields = next(csvreader)
  
        # extracting each data row one by one
        for row in csvreader:
            rows.append(row)
    cancer_gene_names_list = []
    for row in rows:
        cancer_gene_names_list.append(row[0])
    return cancer_gene_names_list

In [None]:
def load_gene_mapping_from_file(filename = "identifier_mappings.txt"):
    fields = []
    rows = []
    with open(filename, 'r') as csvfile:
        # creating a csv reader object
        csvreader = csv.reader(csvfile, delimiter = "\t")
      
        # extracting field names through first row
        fields = next(csvreader)
  
        # extracting each data row one by one
        for row in csvreader:
            rows.append(row)
    gene_names_dict = {}
    gene_ids_dict = {}
    for row in rows:
        if row[2] == 'Gene Name':
            gene_names_dict[row[0]] = row[1]
            gene_ids_dict[row[1]] = row[0]
    return gene_names_dict, gene_ids_dict

In [None]:
def load_weighted_G_from_file(filename):
    G = nx.Graph()
    G = nx.read_weighted_edgelist(filename)

In [None]:
def cal_weight_deg_norm(G):
    wt_deg = {}
    for n, nbrs in G.adj.items():
        wt_sum = 0
        for nbr, eattr in nbrs.items():
            wt = eattr['weight']
            wt_sum += wt
        wt_deg[n] = wt_sum
    wt_deg_max = max(list(wt_deg.values()))
    wt_deg_nor = {}
    for g, g_deg in wt_deg.items():
        wt_deg_nor[g] = g_deg/wt_deg_max
    return wt_deg_nor

In [None]:
def cal_weight_eigen_cen_norm(G):
    cen_eigen = nx.katz_centrality(G,weight='weight')
    cen_eigen_max = max(list(cen_eigen.values()))
    cen_eigen_nor = {}
    for g, g_cen in cen_eigen.items():
        cen_eigen_nor[g] = g_cen/cen_eigen_max
    return cen_eigen_nor

In [None]:
def cal_comb_weight_cen_sortedlist(G):
    wt_deg_nor = cal_weight_deg_norm(G)
    cen_eigen_nor = cal_weight_eigen_cen_norm(G)
    cen = {}
    for g, g_deg in wt_deg_nor.items():
        cen[g] = g_deg + cen_eigen_nor[g]
    sortlist = sorted(cen.items(), key = operator.itemgetter(1), reverse = True)
    return sortlist

In [None]:
def is_tp(gene_id, cancer_gene_names_list, gene_names_dict):
    if gene_names_dict[gene_id] in cancer_gene_names_list:
        return True
    else:
        return False

In [None]:
def fpr(pred_p, pred_n):
    fp_num = 0
    for gene in pred_p:
        if not is_tp(gene[0], cancer_gene_names_list, gene_names_dict):
            fp_num += 1
    tn_num = 0
    for gene in pred_n:
        if not is_tp(gene[0], cancer_gene_names_list, gene_names_dict):
            tn_num += 1
    return fp_num / (fp_num + tn_num)

In [None]:
def tpr(pred_p, pred_n):
    tp_num = 0
    for gene in pred_p:
        if is_tp(gene[0], cancer_gene_names_list, gene_names_dict):
            tp_num += 1
    fn_num = 0
    for gene in pred_n:
        if is_tp(gene[0], cancer_gene_names_list, gene_names_dict):
            fn_num += 1
    return tp_num / (tp_num + fn_num)

In [None]:
def cal_roc_xy(sort_gene_weights, interval = 100):
    x_fpr = []
    y_tpr = []
    for i in range(1,interval):
        clf_l = int(len(sortlist) * (1/interval) * i)
        pred_p = sortlist[:clf_l]
        pred_n = sortlist[clf_l:]
        x_fpr.append(fpr(pred_p,pred_n))
        y_tpr.append(tpr(pred_p,pred_n))
    return x_fpr, y_tpr

In [None]:
def auc(y_tpr):
    area_sum = 0
    for i in range(0, 99):
        area_sum += y_tpr[i] * 0.01
    return area_sum