In [46]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [58]:
import pandas as pd
import SRW_v041 as SRW
import pickle

In [48]:
edges, features, node_names = SRW.load_network('data/BRCA_edge2features.txt')

* Loading network...


In [49]:
P_init_train, sample_names_train = SRW.load_samples('data/BRCA_training_data.txt', node_names)

	- Nodes in adjacency matrix: 17557


In [50]:
P_init_val, sample_names_val = SRW.load_samples('data/BRCA_validation_data.txt', node_names)

	- Nodes in adjacency matrix: 17557


In [51]:
group_labels_train = SRW.load_grouplabels('data/BRCA_training_lables.txt')

In [52]:
group_labels_val = SRW.load_grouplabels('data/BRCA_validation_lables.txt')

In [53]:
nnodes = len(node_names)
rst_prob = 0.3
lam = 1e-8
w_init_sd = 0.01

In [54]:
feature_names = []
with open('data/BRCA_feature_names.txt') as f:
    for line in f.read().rstrip().splitlines():
        feature_names.append(line)
feature_names.append('selfloop')
feature_names.append('intercept')

In [55]:
SRW_obj = SRW.SRW_solver(edges, features, nnodes, P_init_train, rst_prob, group_labels_train, lam, 
                         w_init_sd=0.01, w=None, feature_names=feature_names, sample_names=sample_names_train, 
                         node_names=node_names, loss='centroid', norm_type='L2', maximize_diff=False, 
                         learning_rate=1., update_w_func='Adam', P_init_val=P_init_val, 
                         group_labels_val=group_labels_val, eval_sil=True, ncpus=1)

### Arguments of SRW_solver objects  
**edges** (e by 2, int, ndarray): Edges in the network  
**features** (e by w, float, csc_matrix): Edge features  
**nnodes** (int): Number of nodes in the network  
**P_init** (m by n, float, csr_matrix): Initial state of samples (training set)  
**rst_prob** (float): Reset probability of random walk  
**group_labels** (m by 1, str/int, list): Group labels of samples (training set)   
**lam** (float): Regularization parameter, controling the amount of L1/L2 norm  
**w_init_sd** (float): Standard deviation for weight initialization (default 0.01)  
**w** (w by 1, float, list): Initial weights (default None)  
**feature_names** (w by 1, str, list): Feature names (default [])  
**sample_names** (m by 1, str, list): Feature names (default [])  
**node_names** (n by 1, str, list): Feature names (default [])  
**loss** ('squared', 'absolute', 'centroid', 'silhouette'): Type of the loss funtion (default 'squared')  
**norm_type** ('L1', 'L2'): Type of the norm (derault 'L2')  
**maximize_diff** (bool): Whether to maximaze the difference between groups (default False)  
**learning_rate** (float): Learning rate (default 0.1)  
**update_w_func** ('momentum', 'Nesterov', 'Adam', 'Nadam'): Function for updating parameters (default 'Adam')  
**P_init_val** (m by n, float, csr_matrix): Initial state of samples (validation set) (default None)  
**group_labels_val** (m by 1, str/int, list): Group labels of samples (validation set) (default None)  
**eval_sil** (bool): Whether to evaluate silhouette width (default: False)  
**ncpus** (int): Number of CPUs to use for multiprocess.Pool (default: -1, use all cpus)  

In [None]:
SRW_obj.train_SRW_GD()

# Scratch

In [56]:
SRW_obj.init_w()
SRW_obj.map_w()
SRW_obj.w_map

Unnamed: 0,Weight
Adherens junction,0.004322
Apoptosis,0.010622
B cell receptor signaling pathway,0.008194
BIND,-0.006660
Base excision repair,-0.013622
BioGRID,-0.010996
CORUM,-0.002976
CTD,0.007315
Cell cycle,0.002697
Cytokine-cytokine receptor interaction,-0.006396


In [34]:
SRW_obj.init_w()
SRW_obj.generate_Q_and_P_fin()

In [60]:
with open('data/SRW_obj_3') as f:
    SRW_obj = pickle.load(f)

In [67]:
SRW_obj.w_map.Weight.sort_values()

controls-expression-of_directed             -9.308106
selfloop                                    -3.489996
intercept                                   -3.150205
mutrate_target                              -0.500697
Epithelial-mesenchymal transition           -0.156851
Natural killer cell mediated cytotoxicity   -0.151249
B cell receptor signaling pathway           -0.108847
ESC proliferation                           -0.088785
KEGG                                        -0.055768
Apoptosis                                   -0.053781
mutual_exclusive                            -0.028445
telomere maintenance                        -0.027874
Cytokine-cytokine receptor interaction      -0.025206
Mismatch repair                             -0.023935
controls-phosphorylation-of_directed        -0.010669
PPAR signaling pathway                      -0.006356
Focal adhesion                              -0.004227
mTOR signaling pathway                      -0.003195
VEGF signaling pathway      

In [68]:
SRW_obj.generate_Q_and_P_fin()

In [78]:
SRW_obj.P_fin_df

Unnamed: 0,A1BG,A1CF,A2M,A2ML1,A4GALT,A4GNT,AAAS,AACS,AADAC,AADAT,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1,ZZZ3
TCGA-A1-A0SD,0.000033,0.000021,0.000086,0.000002,0.000011,0.000019,0.000025,0.000032,0.000006,0.000007,...,0.000052,0.000022,0.000049,1.126389e-06,0.000024,1.872107e-07,0.000003,0.000191,0.000012,0.000025
TCGA-A1-A0SH,0.000031,0.000014,0.000091,0.000001,0.000024,0.000081,0.000031,0.000057,0.000003,0.000011,...,0.000106,0.000043,0.000069,6.695249e-07,0.000010,1.847108e-07,0.000005,0.000038,0.000010,0.000017
TCGA-A1-A0SJ,0.000042,0.000020,0.000089,0.000003,0.000035,0.000010,0.000038,0.000049,0.000003,0.000009,...,0.000043,0.000017,0.000044,1.418609e-06,0.000035,1.210918e-07,0.000004,0.000041,0.000016,0.000024
TCGA-A1-A0SM,0.000032,0.000016,0.000078,0.000002,0.000012,0.000005,0.000028,0.000028,0.000002,0.000007,...,0.000067,0.000048,0.000076,8.085955e-07,0.000014,7.229341e-08,0.000004,0.000045,0.000008,0.000021
TCGA-A1-A0SN,0.000034,0.000016,0.000089,0.000001,0.000023,0.000077,0.000045,0.000042,0.000003,0.000009,...,0.000044,0.000025,0.000058,6.288759e-07,0.000009,1.964273e-07,0.000005,0.000045,0.000009,0.000017
TCGA-A1-A0SQ,0.000027,0.000022,0.000092,0.000002,0.000028,0.000024,0.000025,0.000065,0.000002,0.000010,...,0.000074,0.000054,0.000037,5.526172e-07,0.000009,7.687999e-08,0.000003,0.000034,0.000010,0.000018
TCGA-A2-A04N,0.000038,0.000027,0.000127,0.000002,0.000029,0.000028,0.000029,0.000101,0.000005,0.000019,...,0.000053,0.000024,0.000056,8.111314e-07,0.000012,1.263387e-07,0.000007,0.000044,0.000022,0.000028
TCGA-A2-A04P,0.000038,0.000017,0.000136,0.000002,0.000013,0.000006,0.000025,0.000076,0.000003,0.000014,...,0.000066,0.000039,0.000059,1.655668e-06,0.000011,9.410530e-08,0.000004,0.000043,0.000013,0.000022
TCGA-A2-A04R,0.000038,0.000104,0.000097,0.000002,0.000012,0.000006,0.000022,0.000035,0.000003,0.000009,...,0.000111,0.000016,0.000051,5.913089e-07,0.000008,9.475785e-08,0.000025,0.000059,0.000013,0.000050
TCGA-A2-A04U,0.000041,0.000034,0.000180,0.000001,0.000020,0.000008,0.000048,0.000105,0.000002,0.000011,...,0.000211,0.000046,0.000053,6.024080e-07,0.000008,7.445288e-08,0.000005,0.000087,0.000011,0.000023
