In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import SRW_v044 as SRW
from scipy.sparse import csr_matrix, csc_matrix, issparse
import functools

# This toy example is for checking the partial derivatives

In [66]:
edges = np.array([[0,1],[0,2],[1,2],[1,0],[2,0],[2,1]])
features = csc_matrix([[0.9,0.1,1.],[0.1,0.9,1.],[0.5,0.5,1.],[0.9,0.1,1.],[0.1,0.9,1.],[0.5,0.5,1.]])
w0 = np.array([0.699999,0.3,0.1])
w1 = np.array([0.700001,0.3,0.1])
rst_prob = 0.5
P_init = mutation_profile = csr_matrix([[1,0,0],[0,1,0],[0,1,1],[1,1,0],[1,1,1]])
nnodes = 3
group_labels = [1, 0, 1, 0, 0]
lam = 0.001

In [67]:
Q0, M_strength0, M_strength_rowSum0, strength_grad0 = SRW.strength_Q_and_gradient(edges, nnodes, features, w0)
Q1, M_strength1, M_strength_rowSum1, strength_grad1 = SRW.strength_Q_and_gradient(edges, nnodes, features, w1)

In [68]:
Q_grad = SRW.Q_gradient_1feature(edges, nnodes, M_strength0, M_strength_rowSum0, strength_grad0.toarray()[:,0]) 
Q_grad.toarray()

array([[ 0.        ,  0.06170307, -0.06170307],
       [ 0.02738265,  0.        , -0.02738265],
       [-0.03446876,  0.03446876,  0.        ]])

In [69]:
(Q1 - Q0).toarray() / 0.000002 # check

array([[ 0.        ,  0.06170302, -0.06170302],
       [ 0.02738262,  0.        , -0.02738262],
       [-0.03446875,  0.03446875,  0.        ]])

In [70]:
P0 = csr_matrix(SRW.iterative_PPR(Q0.toarray(), SRW.renorm(mutation_profile).toarray(), rst_prob))
P1 = csr_matrix(SRW.iterative_PPR(Q1.toarray(), SRW.renorm(mutation_profile).toarray(), rst_prob))

In [71]:
P_grad = SRW.calc_P_grad_pool(edges, nnodes, M_strength0, M_strength_rowSum0, Q0, P0, rst_prob, strength_grad0)
P_grad[0,:,:]

array([[-0.00017484,  0.01735194, -0.0171771 ],
       [ 0.00403931,  0.00764498, -0.0116843 ],
       [-0.00091024,  0.01031905, -0.00940881],
       [ 0.00193224,  0.01249846, -0.0144307 ],
       [-0.0006651 ,  0.01266335, -0.01199824]])

In [72]:
(P1 - P0).toarray() / 0.000002 # check

array([[-0.00017466,  0.01735143, -0.01717677],
       [ 0.00403884,  0.00764556, -0.0116844 ],
       [-0.00091031,  0.01031926, -0.00940895],
       [ 0.00193209,  0.01249849, -0.01443059],
       [-0.00066509,  0.01266332, -0.01199823]])

In [73]:
SRW_obj_0 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w=w0)
SRW_obj_1 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w=w1)

In [74]:
J0, J_grad = SRW_obj_0.obj_func_and_grad(P0, P_grad, w0)
J1, J_grad1 = SRW_obj_1.obj_func_and_grad(P1, P_grad, w1)

[[ 0.  3.]
 [ 2.  0.]]
[[ 0.  2.]
 [ 2.  0.]]
[[ 0.  3.]
 [ 2.  0.]]
[[ 0.  2.]
 [ 2.  0.]]


In [75]:
J_grad

array([  1.00042987e-03,   9.99538425e-04,  -3.17005098e-08])

In [76]:
(J1 - J0) / 0.000002  # check

0.0010004299610955059

In [77]:
SRW_obj_0.loss = SRW_obj_1.loss = 'WMW'
SRW_obj_0.norm_type = SRW_obj_1.norm_type = 'L1'
J0, J_grad = SRW_obj_0.obj_func_and_grad(P0, P_grad, w0)
J1, J_grad1 = SRW_obj_1.obj_func_and_grad(P1, P_grad, w1)

[[ 0.  3.]
 [ 2.  0.]]
[[ 0.  2.]
 [ 2.  0.]]
[[ 0.  3.]
 [ 2.  0.]]
[[ 0.  2.]
 [ 2.  0.]]


In [78]:
J_grad

array([  1.00042987e-03,   9.99538425e-04,  -3.17005098e-08])

In [80]:
(J1 - J0) / 0.000002  # check

0.0010004299610955059

# This toy example is for checking the gradient descent functions

In [49]:
edges = [[0,1],[0,2],[0,3],[0,4],[0,5],[1,2],[1,3],[1,4],[1,5],[2,3],[2,4],[2,5],[3,4],[3,5],[4,5],
         [1,0],[2,0],[3,0],[4,0],[5,0],[2,1],[3,1],[4,1],[5,1],[3,2],[4,2],[5,2],[4,3],[5,3],[5,4],
         [0,0],[1,1],[2,2],[3,3],[4,4],[5,5]]
edges = np.array(edges)
features = csc_matrix([[.9,.4,0.,1.],[.9,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.5,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.4,0.,1.],[.9,.6,0.,1.],[.9,.5,0.,1.],
                       [.9,.4,0.,1.],[.9,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.5,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.4,0.,1.],[.9,.6,0.,1.],[.9,.5,0.,1.],
                       [.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.]])
rst_prob = 0.3
P_init = mutation_profile = csr_matrix([[1,0,0,0,0,0],[0,1,0,0,0,0],[0,0,1,0,0,0],
                                        [0,0,0,1,0,0],[0,0,0,0,1,0],[0,0,0,0,0,1]]).astype(float)
nnodes = 6
group_labels = [0, 0, 0, 1, 1, 1]
lam = 2e-7
feature_names = ['f1', 'f2', 'selfloop', 'intercept']
node_names = ['n1', 'n2', 'n3', 'n4', 'n5', 'n6']
sample_names = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6']

In [52]:
SRW_obj_2 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w_init_sd=0.01, 
                           w=None, feature_names=feature_names, sample_names=sample_names, 
                           node_names=node_names, loss='WMW', norm_type='L1', maximize_diff=False, 
                           learning_rate=1., update_w_func='Adam', P_init_val=P_init, 
                           group_labels_val=group_labels, ncpus=-1, maxit=1000, early_stop=10, 
                           WMW_b=0.005)

In [53]:
SRW_obj_2.train_SRW_GD()

finished calculating strength_grad: 17:48:44
finished network propagation: 17:48:44
finished calculating P_grad using pool: 17:48:45
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  3.]
 [ 3.  0.]]
finished calculating J and J_grad: 17:48:45
*** 0 iteration: J is 5.73526346331 cost_val is 3.9466695207e-05
*** accuracy is 0.0 accuracy_val is 1.0
[-0.02195841  0.0043074  -0.01338501 -0.01008228] 

finished calculating strength_grad: 17:48:45
finished network propagation: 17:48:45
finished calculating P_grad using pool: 17:48:45
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  3.]
 [ 3.  0.]]
finished calculating J and J_grad: 17:48:45
*** 1 iteration: J is 5.20846598278 cost_val is 6.49447907489e-05
*** accuracy is 0.0 accuracy_val is 1.0
[ 0.97804158  1.0043072  -1.0133849   0.98991666] 

finished calculating strength_grad: 17:48:45
finished network propagation: 17:48:45
finished calculating P_grad using pool: 17:48:45
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 17:48:45
*** 2 iteration:

finished calculating P_grad using pool: 17:48:50
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 17:48:50
*** 22 iteration: J is 4.40305791001e-06 cost_val is 7.40773865691e-56
*** accuracy is 1.0 accuracy_val is 1.0
[ 8.79935595 -4.93596102 -8.27997258 -5.29911949] 

finished calculating strength_grad: 17:48:50
finished network propagation: 17:48:50
finished calculating P_grad using pool: 17:48:50
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 17:48:50
*** 23 iteration: J is 4.45469382111e-06 cost_val is 5.61125647016e-56
*** accuracy is 1.0 accuracy_val is 1.0
[ 8.89571719 -5.0109484  -8.36680351 -5.37691941] 

finished calculating strength_grad: 17:48:50
finished network propagation: 17:48:50
finished calculating P_grad using pool: 17:48:50
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 17:48:50
*** 24 iteration: J is 4.50172007655e-06 cost_val is 4.46013400528e-56
*** accuracy is 1.

In [54]:
SRW_obj_2.generate_Q_and_P_fin()

In [55]:
SRW_obj_2.w

array([ 9.69742122, -5.63482682, -9.08913663, -6.02420502])

In [56]:
SRW_obj_2.w_map

Unnamed: 0,Weight
f1,9.697421
f2,-5.634827
selfloop,-9.089137
intercept,-6.024205


### Check the transition matrix Q and propagation scores P

In [57]:
SRW_obj_2.Q_fin_df

Unnamed: 0,n1,n2,n3,n4,n5,n6
n1,2.879119e-07,0.6435883,0.3550752,0.0007056913,0.0004018117,0.0002287581
n2,0.563561,2.521114e-07,0.4352687,0.0006179418,0.0003518483,0.0002003131
n3,0.4160274,0.5824066,3.373349e-07,0.0008268304,0.0004707868,0.0002680268
n4,0.0007051407,0.0007051407,0.0007051407,2.876873e-07,0.6430861,0.3547982
n5,0.0003518886,0.0003518886,0.0003518886,0.5636255,2.521402e-07,0.4353186
n6,0.000268231,0.000268231,0.000268231,0.4163445,0.5828505,3.37592e-07


In [58]:
SRW_obj_2.P_fin_df

Unnamed: 0,n1,n2,n3,n4,n5,n6
p1,0.481443,0.303366,0.212097,0.001169,0.00113,0.000794
p2,0.265643,0.509938,0.221399,0.001141,0.001103,0.000775
p3,0.248506,0.296241,0.452029,0.001219,0.001178,0.000828
p4,0.001168,0.001302,0.001039,0.481382,0.30317,0.211939
p5,0.00099,0.001103,0.000881,0.26571,0.509935,0.221381
p6,0.000931,0.001038,0.000828,0.248704,0.296408,0.452091


In [59]:
print '*** cost is', SRW_obj_2.cost, 'cost_val is', SRW_obj_2.cost_val
print '*** accuracy is', SRW_obj_2.accuracy, 'accuracy_val is', SRW_obj_2.accuracy_val

*** cost is 1.31385917477e-52 cost_val is 1.29675576854e-56
*** accuracy is 1.0 accuracy_val is 1.0


### Compare with unweighted random walk

In [36]:
SRW_obj_3 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w_init_sd=0.01, 
                           w=[0.,0.,0.,0.], 
                           feature_names=feature_names, loss='WMW', norm_type='L1', maximize_diff=False, 
                           learning_rate=1, update_w_func='Adam', P_init_val=P_init, 
                           group_labels_val=group_labels)
SRW_obj_3.generate_Q_and_P_fin()

In [37]:
SRW_obj_3.Q_fin.toarray()

array([[ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667]])

In [38]:
SRW_obj_3.P_fin

array([[ 0.41666667,  0.11666667,  0.11666667,  0.11666667,  0.11666667,
         0.11666667],
       [ 0.11666667,  0.41666667,  0.11666667,  0.11666667,  0.11666667,
         0.11666667],
       [ 0.11666667,  0.11666667,  0.41666667,  0.11666667,  0.11666667,
         0.11666667],
       [ 0.11666667,  0.11666667,  0.11666667,  0.41666667,  0.11666667,
         0.11666667],
       [ 0.11666667,  0.11666667,  0.11666667,  0.11666667,  0.41666667,
         0.11666667],
       [ 0.11666667,  0.11666667,  0.11666667,  0.11666667,  0.11666667,
         0.41666667]])

In [39]:
SRW_obj_3.C = SRW_obj_3.centroid(SRW_obj_3.P_fin, SRW_obj_3.ngroups, SRW_obj_3.group2indeces_list)
SRW_obj_3.calc_cost_val(SRW_obj_3.P_fin)
SRW_obj_3.cost_val

4.600588844233396e-52

# Scratch

In [154]:
import pandas as pd

In [158]:
pd.DataFrame(SRW_obj_2.P_init.toarray(), index=['p1', 'p2', 'p3', 'p4', 'p5', 'p6'], 
             columns=['n1', 'n2', 'n3', 'n4', 'n5', 'n6'])

Unnamed: 0,n1,n2,n3,n4,n5,n6
p1,1.0,0.0,0.0,0.0,0.0,0.0
p2,0.0,1.0,0.0,0.0,0.0,0.0
p3,0.0,0.0,1.0,0.0,0.0,0.0
p4,0.0,0.0,0.0,1.0,0.0,0.0
p5,0.0,0.0,0.0,0.0,1.0,0.0
p6,0.0,0.0,0.0,0.0,0.0,1.0
