In [1]:
import pandas as pd
import numpy as np
import time
import rrBLUP

In [2]:
# Use the converted data which include nan value and genotype values with -1(0/0), 0(0/1), 1(1/1)

train = pd.read_csv('data/protein.train.nan-101.csv')
test = pd.read_csv('data/protein.test.nan-101.csv')

train_x = np.array(train.drop('label', axis=1))
train_y = np.array(train['label']).reshape(-1,1)
test_x = np.array(test.drop('label', axis=1))
test_y = np.array(test['label']).reshape(-1,1)

In [3]:
# A_mat function with mean impute method and return imputed data

start_time = time.time()
Amat, train_x_imp = rrBLUP.A_mat(train_x, impute_method = 'mean', return_imputed = True)
end_time = time.time()
print(Amat)
print(train_x_imp)
print('Running Time: '+str(round(end_time-start_time, 2))+' s')

[[ 1.52228558e+00 -1.07000128e-01  1.45080921e-01 ...  6.51882121e-02
   9.77879455e-02 -1.96885674e-01]
 [-1.07000128e-01  1.35232039e+00 -1.73538188e-01 ... -2.14950401e-01
  -1.53218504e-01 -5.05008854e-02]
 [ 1.45080921e-01 -1.73538188e-01  1.49166524e+00 ...  7.84011402e-02
  -2.47541001e-01 -1.49507640e-01]
 ...
 [ 6.51882121e-02 -2.14950401e-01  7.84011402e-02 ...  1.21667284e+00
  -1.95091519e-01 -9.15425151e-04]
 [ 9.77879455e-02 -1.53218504e-01 -2.47541001e-01 ... -1.95091519e-01
   1.32489524e+00 -5.15021975e-02]
 [-1.96885674e-01 -5.05008854e-02 -1.49507640e-01 ... -9.15425151e-04
  -5.15021975e-02  1.43376270e+00]]
[[-0.02561475  1.          0.         ... -1.         -1.
  -1.        ]
 [-0.02561475 -0.01291605  0.00221157 ...  0.0140056   0.00516462
  -1.        ]
 [ 1.         -0.01291605  0.00221157 ...  1.          1.
   1.        ]
 ...
 [-0.02561475 -0.01291605  0.00221157 ... -1.         -1.
  -1.        ]
 [-0.02561475 -0.01291605  0.00221157 ... -1.         -1.
 

In [4]:
# A_mat function with REG shrink

start_time = time.time()
Amat = rrBLUP.A_mat(train_x_imp, shrink = "REG")
end_time = time.time()
print(Amat)
print('Running Time: '+str(round(end_time-start_time, 2))+' s')

Shrinkage intensity: -0.13
[[ 1.53915530e+00 -1.21290885e-01  1.64457684e-01 ...  7.38946398e-02
   1.10848339e-01 -2.23181393e-01]
 [-1.21290885e-01  1.34648984e+00 -1.96715656e-01 ... -2.43658814e-01
  -1.73682109e-01 -5.72456985e-02]
 [ 1.64457684e-01 -1.96715656e-01  1.50444535e+00 ...  8.88722643e-02
  -2.80602160e-01 -1.69475628e-01]
 ...
 [ 7.38946398e-02 -2.43658814e-01  8.88722643e-02 ...  1.19272543e+00
  -2.21147613e-01 -1.03768779e-03]
 [ 1.10848339e-01 -1.73682109e-01 -2.80602160e-01 ... -2.21147613e-01
   1.31540183e+00 -5.83807442e-02]
 [-2.23181393e-01 -5.72456985e-02 -1.69475628e-01 ... -1.03768779e-03
  -5.83807442e-02  1.43880945e+00]]
Running Time: 77.6 s


In [5]:
_, test_x_imp = rrBLUP.A_mat(test_x, impute_method = 'mean', return_imputed = True)

In [6]:
# mixed_solve function using matrix Z as input

start_time = time.time()
result = rrBLUP.mixed_solve(y = train_y, Z = train_x_imp)
end_time = time.time()
print(result)
print('Running Time: '+str(round(end_time-start_time, 2))+' s')
train_pred = np.dot(train_x_imp, result['u']) + result['beta']
test_pred = np.dot(test_x_imp, result['u']) + result['beta']
print(train_y)
print(train_pred)
print(test_y)
print(test_pred)

{'Vu': 0.0003357920566962145, 'Ve': array([0.62794301]), 'beta': array([[0.01984024]]), 'u': array([[-0.02586448],
       [-0.0200761 ],
       [-0.00426553],
       ...,
       [-0.0105526 ],
       [-0.01251296],
       [-0.01523766]]), 'LL': array([-5290.27755271])}
Running Time: 61.43 s
[[-0.40751316]
 [-0.93705758]
 [ 2.17188063]
 ...
 [ 0.43804971]
 [-1.27869914]
 [ 1.87294426]]
[[ 0.0217592 ]
 [-0.25779721]
 [ 1.48419103]
 ...
 [ 1.02846712]
 [-0.79213007]
 [ 0.96663782]]
[[0.92488893]
 [1.84732115]
 [1.04446348]
 ...
 [0.45513178]
 [1.0273814 ]
 [1.25798945]]
[[ 1.3971361 ]
 [ 1.18552083]
 [ 0.32032063]
 ...
 [-0.58659504]
 [ 0.20962035]
 [ 0.47375255]]


In [7]:
# mixed_solve function using matrix K as input

start_time = time.time()
result = rrBLUP.mixed_solve(y = train_y, K = rrBLUP.A_mat(train_x_imp))
end_time = time.time()
print(result)
print('Running Time: '+str(round(end_time-start_time, 2))+' s')
train_pred = result['u'] + result['beta']
print(train_y)
print(train_pred)

{'Vu': 0.7103740147314892, 'Ve': array([0.62794263]), 'beta': array([[0.0184644]]), 'u': array([[ 0.0032946 ],
       [-0.27626182],
       [ 1.46572679],
       ...,
       [ 1.0100037 ],
       [-0.81059465],
       [ 0.94817382]]), 'LL': array([-5290.27755271])}
Running Time: 139.81 s
[[-0.40751316]
 [-0.93705758]
 [ 2.17188063]
 ...
 [ 0.43804971]
 [-1.27869914]
 [ 1.87294426]]
[[ 0.02175899]
 [-0.25779742]
 [ 1.48419119]
 ...
 [ 1.02846809]
 [-0.79213025]
 [ 0.96663822]]
