In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import SRW_v043 as SRW
from scipy.sparse import csr_matrix, csc_matrix, issparse
import functools

# This toy example is for checking the partial derivatives

In [4]:
edges = np.array([[0,1],[0,2],[1,2],[1,0],[2,0],[2,1]])
features = csc_matrix([[0.9,0.1,1.],[0.1,0.9,1.],[0.5,0.5,1.],[0.9,0.1,1.],[0.1,0.9,1.],[0.5,0.5,1.]])
w0 = np.array([0.7,0.3,0.1])
w1 = np.array([0.700001,0.3,0.1])
rst_prob = 0.5
P_init = mutation_profile = csr_matrix([[1,0,0],[0,1,0],[0,1,1],[1,1,0],[1,1,1]])
nnodes = 3
group_labels = [1, 0, 1, 0, 0]
lam = 0.001

In [7]:
Q0, M_strength0, M_strength_rowSum0, strength_grad0 = SRW.strength_Q_and_gradient(edges, nnodes, features, w0)
Q1, M_strength1, M_strength_rowSum1, strength_grad1 = SRW.strength_Q_and_gradient(edges, nnodes, features, w1)

In [8]:
Q_grad = SRW.Q_gradient_1feature(edges, nnodes, M_strength0, M_strength_rowSum0, strength_grad0.toarray()[:,0]) 
Q_grad.toarray()

array([[ 0.        ,  0.06170302, -0.06170302],
       [ 0.02738262,  0.        , -0.02738262],
       [-0.03446875,  0.03446875,  0.        ]])

In [9]:
(Q1 - Q0).toarray() / 0.000001 # check

array([[ 0.        ,  0.061703  , -0.061703  ],
       [ 0.0273826 ,  0.        , -0.0273826 ],
       [-0.03446874,  0.03446874,  0.        ]])

In [10]:
P0 = csr_matrix(SRW.iterative_PPR(Q0.toarray(), SRW.renorm(mutation_profile).toarray(), rst_prob))
P1 = csr_matrix(SRW.iterative_PPR(Q1.toarray(), SRW.renorm(mutation_profile).toarray(), rst_prob))

In [11]:
P_grad = SRW.calc_P_grad_pool(edges, nnodes, M_strength0, M_strength_rowSum0, Q0, P0, rst_prob, strength_grad0)
P_grad[0,:,:]

array([[-0.00017484,  0.01735193, -0.01717709],
       [ 0.00403931,  0.00764498, -0.01168429],
       [-0.00091024,  0.01031905, -0.0094088 ],
       [ 0.00193224,  0.01249845, -0.01443069],
       [-0.00066511,  0.01266334, -0.01199823]])

In [12]:
(P1 - P0).toarray() / 0.000001 # check

array([[-0.00017466,  0.01735142, -0.01717676],
       [ 0.00403884,  0.00764556, -0.0116844 ],
       [-0.00091031,  0.01031926, -0.00940895],
       [ 0.00193209,  0.01249849, -0.01443058],
       [-0.00066509,  0.01266331, -0.01199822]])

In [13]:
SRW_obj_0 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w=w0)
SRW_obj_1 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w=w1)

In [14]:
J0, J_grad = SRW_obj_0.obj_func_and_grad(P0, P_grad, w0)
J1, J_grad1 = SRW_obj_1.obj_func_and_grad(P1, P_grad, w1)

In [15]:
J_grad

array([ 0.00047087,  0.00195789,  0.00042876])

In [16]:
(J1 - J0) / 0.000001  # check

0.00047142684200807139

In [17]:
SRW_obj_0.loss = SRW_obj_1.loss = 'WMW'
SRW_obj_0.norm_type = SRW_obj_1.norm_type = 'L1'
J0, J_grad = SRW_obj_0.obj_func_and_grad(P0, P_grad, w0)
J1, J_grad1 = SRW_obj_1.obj_func_and_grad(P1, P_grad, w1)

[[ 0.  3.]
 [ 2.  0.]]
[[ 0.  2.]
 [ 1.  0.]]
[[ 0.  3.]
 [ 2.  0.]]
[[ 0.  2.]
 [ 1.  0.]]


In [18]:
J_grad

array([ 0.00421821, -0.00315465, -0.00093644])

In [19]:
(J1 - J0) / 0.000001  # check

0.0042178278647497791

# This toy example is for checking the gradient descent functions

In [753]:
edges = [[0,1],[0,2],[0,3],[0,4],[0,5],[1,2],[1,3],[1,4],[1,5],[2,3],[2,4],[2,5],[3,4],[3,5],[4,5],
         [1,0],[2,0],[3,0],[4,0],[5,0],[2,1],[3,1],[4,1],[5,1],[3,2],[4,2],[5,2],[4,3],[5,3],[5,4],
         [0,0],[1,1],[2,2],[3,3],[4,4],[5,5]]
edges = np.array(edges)
features = csc_matrix([[.9,.4,0.,1.],[.9,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.5,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.4,0.,1.],[.9,.6,0.,1.],[.9,.5,0.,1.],
                       [.9,.4,0.,1.],[.9,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.5,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.1,.4,0.,1.],[.1,.5,0.,1.],[.1,.6,0.,1.],[.9,.4,0.,1.],[.9,.6,0.,1.],[.9,.5,0.,1.],
                       [.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.],[.0,.0,1.,1.]])
rst_prob = 0.3
P_init = mutation_profile = csr_matrix([[1,0,0,0,0,0],[0,1,0,0,0,0],[0,0,1,0,0,0],
                                        [0,0,0,1,0,0],[0,0,0,0,1,0],[0,0,0,0,0,1]]).astype(float)
nnodes = 6
group_labels = [0, 0, 0, 1, 1, 1]
lam = 2e-7
feature_names = ['f1', 'f2', 'selfloop', 'intercept']
node_names = ['n1', 'n2', 'n3', 'n4', 'n5', 'n6']
sample_names = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6']

In [754]:
SRW_obj_2 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w_init_sd=0.01, 
                           w=None, feature_names=feature_names, sample_names=sample_names, 
                           node_names=node_names, loss='WMW', norm_type='L2', maximize_diff=False, 
                           learning_rate=1., update_w_func='Adam', P_init_val=P_init, 
                           group_labels_val=group_labels, eval_acc=True, ncpus=-1, maxit=1000, early_stop=10, 
                           WMW_b=0.005)

In [755]:
SRW_obj_2.train_SRW_GD()

finished calculating strength_grad: 19:03:15
finished network propagation: 19:03:15
finished calculating P_grad using pool: 19:03:15
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 19:03:15
*** 0 iteration: J is 3.886811031e-05 cost_val is 3.88680131529e-05
*** accuracy is 1.0 accuracy_val is 1.0
[-0.01147596  0.01154667 -0.01485808 -0.00199309] 

finished calculating strength_grad: 19:03:15
finished network propagation: 19:03:15
finished calculating P_grad using pool: 19:03:15
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 19:03:15
*** 1 iteration: J is 4.03438981704e-06 cost_val is 3.4497140586e-06
*** accuracy is 1.0 accuracy_val is 1.0
[ 0.9884015  -0.98807855  0.98495788  0.98831703] 

finished calculating strength_grad: 19:03:15
finished network propagation: 19:03:15
finished calculating P_grad using pool: 19:03:15
[[ 0.  3.]
 [ 3.  0.]]
[[ 0.  0.]
 [ 0.  0.]]
finished calculating J and J_grad: 19:03:15
*** 2 iter

In [756]:
SRW_obj_2.generate_Q_and_P_fin()

In [757]:
SRW_obj_2.w

array([ 4.52045006, -3.94073224,  4.24606926, -2.75969195])

In [758]:
SRW_obj_2.train_SRW_BFGS()

BFGS does not work on this loss function. Please use gradient descent instead


In [759]:
SRW_obj_2.generate_Q_and_P_fin()

In [760]:
SRW_obj_2.w_map

Unnamed: 0,Weight
f1,4.52045
f2,-3.940732
selfloop,4.246069
intercept,-2.759692


### Check the transition matrix Q and propagation scores P

In [761]:
SRW_obj_2.Q_fin_df

Unnamed: 0,n1,n2,n3,n4,n5,n6
n1,0.526061,0.279631,0.166505,0.013001,0.008825,0.005977
n2,0.265544,0.49956,0.208495,0.012346,0.00838,0.005676
n3,0.177147,0.233588,0.559685,0.013832,0.009389,0.006359
n4,0.012857,0.012857,0.012857,0.520235,0.276533,0.164661
n5,0.008391,0.008391,0.008391,0.265879,0.500191,0.208758
n6,0.006427,0.006427,0.006427,0.179028,0.236067,0.565625


In [762]:
SRW_obj_2.P_fin_df

Unnamed: 0,n1,n2,n3,n4,n5,n6
p1,0.569533,0.212438,0.160748,0.022257,0.019527,0.015497
p2,0.201736,0.566691,0.175262,0.02188,0.019196,0.015235
p3,0.171023,0.196355,0.573782,0.022863,0.020058,0.015919
p4,0.02201,0.022786,0.021251,0.565432,0.209491,0.159029
p5,0.018567,0.019221,0.017926,0.20142,0.566904,0.175962
p6,0.016663,0.01725,0.016088,0.172905,0.198981,0.578114


In [763]:
print '*** cost is', SRW_obj_2.cost, 'cost_val is', SRW_obj_2.cost_val
print '*** accuracy is', SRW_obj_2.accuracy, 'accuracy_val is', SRW_obj_2.accuracy_val

*** cost is 8.64864666692e-45 cost_val is 8.64864666692e-45
*** accuracy is 1.0 accuracy_val is 1.0


### Compare with unweighted random walk

In [56]:
SRW_obj_3 = SRW.SRW_solver(edges, features, nnodes, P_init, rst_prob, group_labels, lam, w_init_sd=0.01, 
                           w=[0.,0.,0.,0.], 
                           feature_names=feature_names, loss='squared', norm_type='L2', maximize_diff=False, 
                           learning_rate=1, update_w_func='Adam', P_init_val=P_init, 
                           group_labels_val=group_labels, eval_acc=True)
SRW_obj_3.generate_Q_and_P_fin()

In [57]:
SRW_obj_3.Q_fin.toarray()

array([[ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667],
       [ 0.16666667,  0.16666667,  0.16666667,  0.16666667,  0.16666667,
         0.16666667]])

In [58]:
SRW_obj_3.P_fin

array([[ 0.29166667,  0.14166667,  0.14166667,  0.14166667,  0.14166667,
         0.14166667],
       [ 0.14166667,  0.29166667,  0.14166667,  0.14166667,  0.14166667,
         0.14166667],
       [ 0.14166667,  0.14166667,  0.29166667,  0.14166667,  0.14166667,
         0.14166667],
       [ 0.14166667,  0.14166667,  0.14166667,  0.29166667,  0.14166667,
         0.14166667],
       [ 0.14166667,  0.14166667,  0.14166667,  0.14166667,  0.29166667,
         0.14166667],
       [ 0.14166667,  0.14166667,  0.14166667,  0.14166667,  0.14166667,
         0.29166667]])

In [59]:
SRW_obj_3.calc_cost_val(SRW_obj_3.P_fin)
SRW_obj_3.cost_val

0.0899999999999999

# Scratch

In [154]:
import pandas as pd

In [158]:
pd.DataFrame(SRW_obj_2.P_init.toarray(), index=['p1', 'p2', 'p3', 'p4', 'p5', 'p6'], 
             columns=['n1', 'n2', 'n3', 'n4', 'n5', 'n6'])

Unnamed: 0,n1,n2,n3,n4,n5,n6
p1,1.0,0.0,0.0,0.0,0.0,0.0
p2,0.0,1.0,0.0,0.0,0.0,0.0
p3,0.0,0.0,1.0,0.0,0.0,0.0
p4,0.0,0.0,0.0,1.0,0.0,0.0
p5,0.0,0.0,0.0,0.0,1.0,0.0
p6,0.0,0.0,0.0,0.0,0.0,1.0
