In [127]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [128]:
import numpy as np
import SRW_v042 as SRW
from scipy.sparse import csr_matrix, csc_matrix, issparse
import functools

# This toy example is for checking the partial derivatives

In [129]:
edges = np.array([[0,1],[0,2],[1,2],[1,0],[2,0],[2,1]])
features = csc_matrix([[0.9,0.1,1.],[0.1,0.9,1.],[0.5,0.5,1.],[0.9,0.1,1.],[0.1,0.9,1.],[0.5,0.5,1.]])
w0 = np.array([0.7,0.3,0.1])
w1 = np.array([0.700001,0.3,0.1])
rst_prob = 0.5
P_init = mutation_profile = csr_matrix([[1,0,0],[0,1,0],[0,1,1],[1,1,0],[1,1,1]])
nnodes = 3
group_labels = [1, 0, 1, 0, 0]
lam = 0.001

In [130]:
Q0, M_strength0, M_strength_rowSum0, strength_grad0 = SRW.strength_Q_and_gradient(edges, nnodes, features, w0, 
                                                                                  'logistic')
Q1, M_strength1, M_strength_rowSum1, strength_grad1 = SRW.strength_Q_and_gradient(edges, nnodes, features, w1, 
                                                                                  'logistic')

In [131]:
Q_grad = SRW.Q_gradient_1feature(edges, nnodes, M_strength0, M_strength_rowSum0, strength_grad0.toarray()[:,0]) 
Q_grad.toarray()

array([[ 0.        ,  0.06170302, -0.06170302],
       [ 0.02738262,  0.        , -0.02738262],
       [-0.03446875,  0.03446875,  0.        ]])

In [132]:
(Q1 - Q0).toarray() / 0.000001 # check

array([[ 0.        ,  0.061703  , -0.061703  ],
       [ 0.0273826 ,  0.        , -0.0273826 ],
       [-0.03446874,  0.03446874,  0.        ]])

In [133]:
P0 = csr_matrix(SRW.iterative_PPR(Q0.toarray(), SRW.renorm(mutation_profile).toarray(), rst_prob))
P1 = csr_matrix(SRW.iterative_PPR(Q1.toarray(), SRW.renorm(mutation_profile).toarray(), rst_prob))

In [134]:
P_grad = SRW.calc_P_grad_pool(edges, nnodes, M_strength0, M_strength_rowSum0, Q0, P0, rst_prob, strength_grad0)
P_grad[0,:,:]

array([[-0.00017484,  0.01735193, -0.01717709],
       [ 0.00403931,  0.00764498, -0.01168429],
       [-0.00091024,  0.01031905, -0.0094088 ],
       [ 0.00193224,  0.01249845, -0.01443069],
       [-0.00066511,  0.01266334, -0.01199823]])

In [135]:
(P1 - P0).toarray() / 0.000001 # check

array([[-0.00017466,  0.01735142, -0.01717676],
       [ 0.00403884,  0.00764556, -0.0116844 ],
       [-0.00091031,  0.01031926, -0.00940895],
       [ 0.00193209,  0.01249849, -0.01443058],
       [-0.00066509,  0.01266331, -0.01199822]])

In [136]:
SRW_obj_0 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w=w0)
SRW_obj_1 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w=w1)

In [137]:
J0, J_grad = SRW_obj_0.obj_func_and_grad(P0, P_grad, w0)
J1, J_grad1 = SRW_obj_1.obj_func_and_grad(P1, P_grad, w1)

In [138]:
J_grad

array([ 0.00047087,  0.00195789,  0.00042876])

In [139]:
(J1 - J0) / 0.000001  # check

0.00047142684200807139

In [140]:
SRW_obj_0.loss = SRW_obj_1.loss = 'silhouette'
SRW_obj_0.norm_type = SRW_obj_1.norm_type = 'L1'
J0, J_grad = SRW_obj_0.obj_func_and_grad(P0, P_grad, w0)
J1, J_grad1 = SRW_obj_1.obj_func_and_grad(P1, P_grad, w1)

[[ 0.  3.]
 [ 2.  0.]]
[[ 0.  1.]
 [ 1.  0.]]
[[ 0.  3.]
 [ 2.  0.]]
[[ 0.  1.]
 [ 1.  0.]]


In [141]:
J_grad

array([-0.01530258,  0.023035  ,  0.00573243])

In [142]:
(J1 - J0) / 0.000001  # check

-0.015301236500064874

# This toy example is for checking the gradient descent functions

In [151]:
edges = [[0,1],[0,2],[0,3],[0,4],[0,5],[1,2],[1,3],[1,4],[1,5],[2,3],[2,4],[2,5],[3,4],[3,5],[4,5],
         [1,0],[2,0],[3,0],[4,0],[5,0],[2,1],[3,1],[4,1],[5,1],[3,2],[4,2],[5,2],[4,3],[5,3],[5,4],
         [0,0],[1,1],[2,2],[3,3],[4,4],[5,5]]
edges = np.array(edges)
features = csc_matrix([[.9,.4,0.,1.],[.9,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.5,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.4,0.,1.],[.9,.6,0.,1.],[.9,.5,0.,1.],
                       [.9,.4,0.,1.],[.9,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.5,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.4,0.,1.],[.9,.6,0.,1.],[.9,.5,0.,1.],
                       [.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.]])
rst_prob = 0.15
P_init = mutation_profile = csr_matrix([[1,0,0,0,0,0],[0,1,0,0,0,0],[0,0,1,0,0,0],
                                        [0,0,0,1,0,0],[0,0,0,0,1,0],[0,0,0,0,0,1]]).astype(float)
nnodes = 6
group_labels = [0, 0, 0, 1, 1, 1]
lam = 5e-2
feature_names = ['f1', 'f2', 'selfloop', 'intercept']
node_names = ['n1', 'n2', 'n3', 'n4', 'n5', 'n6']
sample_names = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6']

In [152]:
SRW_obj_2 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w_init_sd=0.01, 
                           w=None, feature_names=feature_names, sample_names=sample_names, 
                           node_names=node_names, loss='silhouette', norm_type='L1', maximize_diff=False, 
                           learning_rate=1., update_w_func='Adam', P_init_val=P_init, 
                           group_labels_val=group_labels, eval_sil=True, ncpus=-1, maxit=1000, early_stop=10, 
                           strength_func='logistic')

In [153]:
SRW_obj_2.train_SRW_GD()

finished calculating strength_grad: 23:44:49
finished network propagation: 23:44:49
finished calculating P_grad using pool: 23:44:49
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 23:44:49
*** 0 iteration: J is -3.00414978015 cost_val is -3.00491565041
*** accuracy is 1.0 accuracy_val is 1.0
*** silhouette is 0.500819275068 silhouette_val is 0.500819275068
[ 0.00959334  0.00561852 -0.00010554 -0.0150855 ] 

finished calculating strength_grad: 23:44:49
finished network propagation: 23:44:49
finished calculating P_grad using pool: 23:44:49
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 23:44:49
*** 1 iteration: J is -3.65347874059 cost_val is -3.80367218344
*** accuracy is 1.0 accuracy_val is 1.0
*** silhouette is 0.633945363907 silhouette_val is 0.633945363907
[ 1.00959332 -0.99438128  0.99989426 -1.01508142] 

finished calculating strength_grad: 23:44:49
finished network propagation: 23:44:49
finished calculating P_gra

finished calculating P_grad using pool: 23:44:56
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 23:44:56
*** 19 iteration: J is -5.4678551112 cost_val is -5.93182260168
*** accuracy is 1.0 accuracy_val is 1.0
*** silhouette is 0.98863710028 silhouette_val is 0.98863710028
[ 8.83012019  0.17931796 -0.26991166 -9.50668524] 

finished calculating strength_grad: 23:44:56
finished network propagation: 23:44:56
finished calculating P_grad using pool: 23:44:56
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 23:44:56
*** 20 iteration: J is -5.46308965855 cost_val is -5.93194917394
*** accuracy is 1.0 accuracy_val is 1.0
*** silhouette is 0.988658195657 silhouette_val is 0.988658195657
[ 8.81943668 -0.26161483 -0.2961388  -9.68098694] 

finished calculating strength_grad: 23:44:56
finished network propagation: 23:44:56
finished calculating P_grad using pool: 23:44:57
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calcula

In [154]:
SRW_obj_2.generate_Q_and_P_fin()

In [155]:
SRW_obj_2.w

array([ 8.78630453, -0.53407803, -0.22035396, -9.84006592])

In [None]:
SRW_obj_2.train_SRW_BFGS()

In [156]:
SRW_obj_2.generate_Q_and_P_fin()

In [157]:
SRW_obj_2.w_map

Unnamed: 0,Weight
f1,8.786305
f2,-0.534078
selfloop,-0.220354
intercept,-9.840066


### Check the transition matrix Q and propagation scores P

In [158]:
SRW_obj_2.Q_fin_df

Unnamed: 0,n1,n2,n3,n4,n5,n6
n1,0.000214,0.523136,0.475178,0.000518,0.000491,0.000465
n2,0.511141,0.000209,0.487211,0.000506,0.000479,0.000454
n3,0.487108,0.511163,0.000219,0.000531,0.000503,0.000477
n4,0.000517,0.000517,0.000517,0.000214,0.523094,0.47514
n5,0.000479,0.000479,0.000479,0.511142,0.000209,0.487212
n6,0.000477,0.000477,0.000477,0.487147,0.511204,0.000219


In [159]:
SRW_obj_2.P_fin_df

Unnamed: 0,n1,n2,n3,n4,n5,n6
p1,0.400917,0.303284,0.287586,0.002754,0.002796,0.002663
p2,0.29633,0.407128,0.288349,0.002748,0.002789,0.002657
p3,0.294807,0.302524,0.394434,0.002762,0.002803,0.00267
p4,0.002754,0.002812,0.002694,0.40091,0.303267,0.287564
p5,0.002732,0.002789,0.002672,0.296337,0.407128,0.288342
p6,0.00273,0.002787,0.00267,0.29483,0.302541,0.394441


In [160]:
print '*** cost is', SRW_obj_2.cost, 'cost_val is', SRW_obj_2.cost_val
print '*** accuracy is', SRW_obj_2.accuracy, 'accuracy_val is', SRW_obj_2.accuracy_val
print '*** silhouette is', SRW_obj_2.sil, 'silhouette_val is', SRW_obj_2.sil_val, '\n'

*** cost is -5.92766914608 cost_val is -5.92766914608
*** accuracy is 1.0 accuracy_val is 1.0
*** silhouette is 0.987944857681 silhouette_val is 0.987944857681 



### Compare with unweighted random walk

In [161]:
SRW_obj_3 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w_init_sd=0.01, 
                           w=[0.,0.,0.,0.], 
                           feature_names=feature_names, loss='squared', norm_type='L2', maximize_diff=False, 
                           learning_rate=1, update_w_func='Adam', P_init_val=P_init, 
                           group_labels_val=group_labels, eval_sil=True)
SRW_obj_3.generate_Q_and_P_fin()

In [162]:
SRW_obj_3.Q_fin.toarray()

array([[ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667]])

In [163]:
SRW_obj_3.P_fin

array([[ 0.29166667,  0.14166667,  0.14166667,  0.14166667,  0.14166667,
         0.14166667],
       [ 0.14166667,  0.29166667,  0.14166667,  0.14166667,  0.14166667,
         0.14166667],
       [ 0.14166667,  0.14166667,  0.29166667,  0.14166667,  0.14166667,
         0.14166667],
       [ 0.14166667,  0.14166667,  0.14166667,  0.29166667,  0.14166667,
         0.14166667],
       [ 0.14166667,  0.14166667,  0.14166667,  0.14166667,  0.29166667,
         0.14166667],
       [ 0.14166667,  0.14166667,  0.14166667,  0.14166667,  0.14166667,
         0.29166667]])

In [165]:
SRW_obj_3.calc_cost_val(SRW_obj_3.P_fin)
SRW_obj_3.cost_val

0.08999999999999993

# Scratch

In [154]:
import pandas as pd

In [158]:
pd.DataFrame(SRW_obj_2.P_init.toarray(), index=['p1', 'p2', 'p3', 'p4', 'p5', 'p6'], 
             columns=['n1', 'n2', 'n3', 'n4', 'n5', 'n6'])

Unnamed: 0,n1,n2,n3,n4,n5,n6
p1,1.0,0.0,0.0,0.0,0.0,0.0
p2,0.0,1.0,0.0,0.0,0.0,0.0
p3,0.0,0.0,1.0,0.0,0.0,0.0
p4,0.0,0.0,0.0,1.0,0.0,0.0
p5,0.0,0.0,0.0,0.0,1.0,0.0
p6,0.0,0.0,0.0,0.0,0.0,1.0
