In [1]:
import glob
import copy
import numpy as np

import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from ncon import ncon
from itertools import product

import nbimporter
from assistant import triplets_to_tensor

# Model

In [None]:
# this function implements the computation of a matrix G as shown in equation A7. The input is a parameter lambda and K (which can be a matrix as 2D array, or in a decomposed way of eigenvalues and eigenvectors)
def g(K, lamb):
    if type(K)==np.ndarray:
        return np.linalg.inv(K+lamb*np.diag(np.ones(len(K))))
    elif type(K)==tuple:
        eigval = K[0]
        regular = lamb*np.ones(len(eigval))
        eigvec = K[1]
        return np.matmul(np.matmul(eigvec, np.diag(1/(eigval+regular))), eigvec.transpose())
    else:
        "type of K not known"
        
# this function implements the computation of a hat matrix H as shown in equation A8. The input is a parameter lambda and K (which can be a matrix as 2D array, or in a decomposed way of eigenvalues and eigenvectors)	    
def h(K, lamb):
    if type(K)==np.ndarray:
        return np.matmul(K, g(K,lamb))
    elif type(K)==tuple:
        eigval = K[0]
        regular = lamb*np.ones(len(eigval))
        eigvec = K[1]
        return np.matmul(np.matmul(eigvec, np.diag(eigval/(eigval+regular))), eigvec.transpose())
    else:
        "type of K not knwon"
        

# this function implements the computation of a leave out hat matrix O based on a hat matrix H (2D np array), as in equation A5. 
def leaveouthat(hat):
    res = copy.copy(hat)
    for i in range(len(hat)):
        res[i,i]=0
    for i in range(len(hat)):
        res[i,:]=res[i,:]/(1-hat[i,i])
        return res
    
# help function that implements multiplication with leave-out group   s (involving leave-out matrices and tensors O).
def apply_leave_out_group(leaveoutgroup,H,Pt):
    if len(leaveoutgroup) == 1:
        o = leaveouthat(H[leaveoutgroup[0]-1]) #123 indices to 012 indices
        t = [-1,-2,-3]
        t[leaveoutgroup[0]-1] = leaveoutgroup[0]
#         print("leaveoutcontractions",[t, [-leaveoutgroup[0], leaveoutgroup[0]]])
        return ncon([Pt]+[o], [t, [-leaveoutgroup[0], leaveoutgroup[0]]])
    else :
        # definitions
        matrices = [H[i-1] for i in leaveoutgroup] #123 indices to 012 indices
        t = [-1, -2, -3]
        for i in leaveoutgroup:
            t[i-1] = -t[i-1]
        network = [t] + [[-i,i] for i in leaveoutgroup]

        #print("leaveoutcontractions", network)

        # full resulet without leave out correction
        res = ncon([Pt] + matrices, network)

        # calculate correction factor
        factor = np.ones(Pt.shape)

        if 1 in leaveoutgroup:
            for i in range(len(Pt[:,1,1])):
                factor[i,:,:] = H[0][i,i]*factor[i,:,:]
        if 2 in leaveoutgroup:
            for i in range(len(Pt[1,:,1])):
                factor[:,i,:] = H[1][i,i]*factor[:,i,:]
        if 3 in leaveoutgroup:
            for i in range(len(Pt[1,1,:])):
                factor[:,:,i] = H[2][i,i]*factor[:,:,i]

        correctionterm = (factor*Pt).astype(dtype=np.float32)
        correctiondivisor = (np.ones(Pt.shape, dtype=np.float32)-factor).astype(dtype=np.float32)
        res = (res-correctionterm)/correctiondivisor
        return res
    
# help function that implements multiplication with leave-in groups (regular multplication with hat matrix).
def apply_leave_in_group(leaveingroup,H,Pt):
    matrices = [H[i-1] for i in leaveingroup]
    t = [-1, -2, -3]
    for i in leaveingroup:
        t[i-1] = -t[i-1]
    network = [t] + [[-i,i] for i in leaveingroup]

#     print("leaveincontractions", network)

    # full resulet without leave out correction
    res = ncon([Pt] + matrices, network)

    return res

In [None]:
class ThreeStep:
    A_=0  ##### this not work #### 

    def fit(self, K, Y, lamb):
#         print(Y.shape, tuple([len(K[i]) for i in range(len(K))]))       
        if Y.shape == tuple([len(K[i]) for i in range(len(K))]):                 # labels_tensor shape = [uniq_drug, uniq_drug, uniq_SE]  , len(K) = [0,1,2]  
                matrices = [Y] + [g(K[i],lamb[i]) for i in range(len(K))]        # len(matrices) = 4 , matrices[0] = Y, matrices[1] = [uniq_drug, uniq_drug, uniq_SE]
                self.A_ = ncon(matrices, [[1,2,3], [-1,1],[-2,2],[-3,3]])
    def predict(self, K):
        matrices = [self.A_] + K
#         print(matrices)
        return ncon(matrices, [[1,2,3], [-1,1],[-2,2],[-3,3]])    

    def leave_out_estimate(self, K,Y, lamb, setting):
        H = [h(K[i], lamb[i]) for i in range(len(K))]     
#         print('H', H)
        P = copy.deepcopy(Y)
#         print('P', P)
        leaveingroup = [1,2,3]      ####??
        for leaveoutgroup in setting:
                leaveingroup.remove(x)
            for x in leaveoutgroup:
#                 print('wht', leaveingroup)
#             print('leave',leaveoutgroup)
            P = apply_leave_out_group(leaveoutgroup, H, P)
#             print('P', P.shape)

        P = apply_leave_in_group(leaveingroup, H, P) 

        return P

# Learning

In [None]:
def to_new_label_tensor(labels_tensor, train, test, setting):
    """
    Building new label tensor for each splited 

    Input setting options: 
    - the first task (new first drug-drug-effect triplet): [[1,2,3]]
    - the second task (new drug-drug): [[1,2]]
    - the third task (new first drug): [[1]]
    - the fourth task (new first drug and new second drug): [[1],[2]]
    
    Output > New Yabc (3D Label Tensors) 
    """
    
    assert setting in [[[1,2,3]], [[1,2]], [[1]], [[1],[2]]], 'please check your setting condition'
    
    # build the train and test tensor
    new_label_tensor_train = copy.deepcopy(labels_tensor).astype(int)
    
    # Setting 1
    if setting == [[1,2,3]]:
        new_label_tensor_test = np.zeros(labels_tensor.shape).astype(float)
        new_label_tensor_test[:] = np.nan
        
        test_pos_t, test_neg_t = tuple(test_pos.transpose()), tuple(test_neg.transpose())       
        
        # train tensor
        new_label_tensor_train[test_pos_t] = 0 
        
        # test tensor
        new_label_tensor_test[test_pos_t] = 1
        new_label_tensor_test[test_neg_t] = 0
    
    # Setting 2
    if setting == [[1,2]]:
        new_label_tensor_test = copy.deepcopy(labels_tensor).astype(float)
        
        train, test  = test_pos, test_neg
        
        train_t = train.transpose()
        train_sym_t  = train_t[[1,0]]

        train_tt= tuple(train_t)
        train_sym_tt= tuple(train_sym_t)

        test_t = test.transpose()
        test_sym_t = test_t[[1,0]]

        test_tt= tuple(test_t)
        test_sym_tt= tuple(test_sym_t)

        # # train tensor
        new_label_tensor_train[test_tt] = 0
        new_label_tensor_train[test_sym_tt] = 0

        # # test tensor                            
        new_label_tensor_test[train_tt] = np.nan
        new_label_tensor_test[train_sym_tt] = np.nan
        new_label_tensor_test[np.diag_indices(labels_tensor.shape[0])] = np.nan
    
    # Setting 3
    if setting == [[1]]:
        train, test  = test_pos, test_neg
        
        train_pair = [(i, j) for i in train for j in train]
        test_pair = [(i, j) for i in train for j in test]  
        
        train_idx = tuple(np.transpose(train_pair)) 
        test_idx = tuple(np.transpose(test_pair))   
        
        # train tensor
        new_label_tensor_train = labels_tensor[train_idx].reshape((len(train), len(train), labels_tensor.shape[-1]))
                    
        # test tensor
        new_label_tensor_test = labels_tensor[test_idx].reshape((len(train),len(test),labels_tensor.shape[-1]))
        
    # Setting 4    
    if setting == [[1],[2]]:
        train, test  = test_pos, test_neg
        
        train_pair = [(i, j) for i in train for j in train]
        test_pair = [(i, j) for i in test for j in test] 
        
        train_idx = tuple(np.transpose(train_pair))  
        test_idx = tuple(np.transpose(test_pair))     

        # train tensor
        new_label_tensor_train = labels_tensor[train_idx].reshape((len(train), len(train), labels_tensor.shape[-1]))
                    
        # test tensor
        new_label_tensor_test = labels_tensor[test_idx].reshape((len(test),len(test),labels_tensor.shape[-1]))
        new_label_tensor_test[np.diag_indices(len(test))] = np.nan  # need to check? 
    
    return(new_label_tensor_train, new_label_tensor_test)

In [None]:
def learning_setting1(hyperparameters, K_drug, K_Y, k_fold, dirname, types): 
    """   
    types = [K_drug][K_Y] type
     - K_drug option [1] K_drug_smile_cos [2] K_drug_morgan_cos
     - K_Y option [1] K_Y_cos [2] K_Y_jacc [3] K_Y_other 
    
    ex) types = [2][1] in TDC means K_drug_morgan_cos, K_SE_cos 
    
    """
    
    # save log
    f = open(dirname + "T1_{}.log".format(types), 'w')
    
    # model 
    threestep = ThreeStep()
    
    # K-fold Cross-validation
    auc_score = []
    for fold in range(k_fold): 
        print("### {}th training set is processing ###".format(fold+1), file =f)
        
        test_pos = np.load(tests_pos[fold])
        test_neg = np.load(tests_neg[fold])
        ## rebuild label tensor for trining  ##
        train_label_tensor, test_label_tensor = to_new_label_tensor(labels_tensor, test_pos, test_neg, [[1,2,3]])

        # leave-one-out estimation
        print("Leave one out estimation processing...", file =f)

        
        # find hyperparameters (first two have same parameter) 
        ac = []
        for hyper in hyperparameters:

            print("LOO estimating... with the hyperparameter {}".format(hyper), file =f)
            FLO = threestep.leave_out_estimate([K_drug,K_drug,K_Y], train_label_tensor,hyper,[[1,2,3]])
            ac.append(roc_auc_score(train_label_tensor.flatten(), FLO.flatten()))

        ac_max = np.argmax(ac)

        chosen_hyper = hyperparameters[ac_max]
        print('chosen hyperparameter', chosen_hyper, file =f)

        # fit into the model with selected hyperparameters 
        print("Fitting the model...", file =f)
        threestep.fit([K_drug, K_drug, K_Y], train_label_tensor, chosen_hyper)

        # predict with test datasets     
        print("Predicting the model...", file =f)
        F = threestep.predict([K_drug, K_drug, K_Y])

        # slice out 
        test_all = np.append(test_label_tensor[tuple(test_pos.transpose())],test_label_tensor[tuple(test_neg.transpose())])
        F_all = np.append(F[tuple(test_pos.transpose())],F[tuple(test_neg.transpose())])

        # auc score
        score = roc_auc_score(test_all, F_all)
        print("score:", score , file =f)
        auc_score.append(score)

        score_to_write = open(dirname + "T1_{}_score.txt".format(types), 'a')
        score_to_write.write(str(score))
        score_to_write.write('\n')
        score_to_write.close()
    
    score_to_write = open(dirname + "T1_{}_score.txt".format(types), 'a')
    score_to_write.write('avg score: ' + str(sum(auc_score)/len(auc_score)))
    score_to_write.close()
    print("avg score: ", sum(auc_score)/len(auc_score))    

In [None]:
def learning_setting2(hyperparameters, K_drug, K_Y, k_fold, dirname, types): 
    """   
    types = [K_drug][K_Y] type
     - K_drug option [1] K_drug_smile_cos [2] K_drug_morgan_cos
     - K_Y option [1] K_Y_cos [2] K_Y_jacc [3] K_Y_other 
    
    ex) types = [2][1] in TDC means K_drug_morgan_cos, K_SE_cos 
    
    """
    
    # save log
    f = open(dirname + "T2_{}.log".format(types), 'w')
    
    # model 
    threestep = ThreeStep()
    
    # K-fold Cross-validation
    auc_score = []
    for fold in range(k_fold): 
        print("### {}th training set is processing ###".format(fold+1), file =f)

        train = np.load(trains[fold])
        test = np.load(tests[fold])

        if len(K_Y) == 10:
            K_Y_train = np.load(K_Y[fold])
 
        else:
            # for DCDB dataset
            K_Y_train = K_Y

        ## rebuild label tensor for trining  ##
        train_label_tensor, test_label_tensor = to_new_label_tensor(labels_tensor, train, test, [[1,2]])

        # leave-one-out estimation
        print("Leave one out estimation processing...", file =f)

        
        # find hyperparameters (first two have same parameter) 
        ac = []
        for hyper in hyperparameters:

            print("LOO estimating... with the hyperparameter {}".format(hyper), file =f)
            FLO = threestep.leave_out_estimate([K_drug, K_drug, K_Y_train], train_label_tensor,hyper, [[1,2]])
            ac.append(roc_auc_score(train_label_tensor.flatten(), FLO.flatten()))

        ac_max = np.argmax(ac)

        chosen_hyper = hyperparameters[ac_max]
        print('chosen hyperparameter', chosen_hyper, file =f)

        # fit into the model with selected hyperparameters 
        print("Fitting the model...", file =f)
        threestep.fit([K_drug, K_drug, K_Y_train], train_label_tensor, chosen_hyper)

        # predict with test datasets     
        print("Predicting the model...", file =f)
        F = threestep.predict([K_drug, K_drug, K_Y_train])

        # flatten 
        test_flatten = test_label_tensor.flatten()
        F_flatten = F.flatten()
        
        # handling NaN Values
        nan_idx = np.argwhere(np.isnan(test_flatten))

        test_adjusted = np.delete(test_flatten, nan_idx)
        F_adjusted = np.delete(F_flatten, nan_idx)
        
        # auc score
        score = roc_auc_score(test_adjusted, F_adjusted)
        print("score:", score , file =f)
        auc_score.append(score)

        score_to_write = open(dirname + "T2_{}_score.txt".format(types), 'a')
        score_to_write.write(str(score))
        score_to_write.write('\n')
        score_to_write.close()
    
    score_to_write = open(dirname + "T2_{}_score.txt".format(types), 'a')
    score_to_write.write('avg score: ' + str(sum(auc_score)/len(auc_score)))
    score_to_write.close()
    print("avg score: ", sum(auc_score)/len(auc_score))    

In [None]:
def learning_setting3(hyperparameters, K_drug, K_Y, k_fold, dirname, types): 
    
    # save log
    f = open(dirname + "T3_{}.log".format(types), 'w')
    
    # model 
    threestep = ThreeStep()
    
    # K-fold Cross-validation
    auc_score = []
    for fold in range(k_fold): 
        print("### {}th training set is processing ###".format(fold+1), file =f)
        train = np.load(trains[fold])
        test = np.load(tests[fold])

        K_drug_train = K_drug[train][:,train]
        
        if len(K_Y) == 10:
            K_Y_train = np.load(K_Y[fold])
        else:
            K_Y_train = K_Y

        ## rebuild label tensor for trining  ##
        train_label_tensor, test_label_tensor = to_new_label_tensor(labels_tensor, train, test, [[1]])

        # leave-one-out estimation
        print("Leave one out estimation processing...", file =f)
        
        # find hyperparameters (first two have same parameter) 
        ac = []
        for hyper in hyperparameters:

            print("LOO estimating... with the hyperparameter {}".format(hyper), file =f)
            FLO = threestep.leave_out_estimate([K_drug_train, K_drug_train, K_Y_train], train_label_tensor, hyper, [[1]])
            ac.append(roc_auc_score(train_label_tensor.flatten(), FLO.flatten()))

        ac_max = np.argmax(ac)

        chosen_hyper = hyperparameters[ac_max]
        print('chosen hyperparameter', chosen_hyper, file =f)

        # fit into the model with selected hyperparameters 
        print("Fitting the model...", file =f)
        threestep.fit([K_drug_train, K_drug_train, K_Y_train], train_label_tensor, chosen_hyper)

        # predict with test datasets     
        print("Predicting the model...", file =f)
        K_drug_test = K_drug[test][:,train] 
        F = threestep.predict([K_drug_train, K_drug_test, K_Y_train])  
        
        # flat
        test_flat = test_label_tensor.flatten()
        F_flat = F.flatten()

        # auc score 
        score = roc_auc_score(test_flat, F_flat)
        print("score:", score , file =f)
        auc_score.append(score)

        score_to_write = open(dirname + "T3_{}_score.txt".format(types), 'a')
        score_to_write.write(str(score))
        score_to_write.write('\n')
        score_to_write.close()
    
    score_to_write = open(dirname + "T3_{}_score.txt".format(types), 'a')
    score_to_write.write('avg score: ' + str(sum(auc_score)/len(auc_score)))
    score_to_write.close()
    print("avg score: ", sum(auc_score)/len(auc_score))    

In [None]:
def learning_setting4(hyperparameters, K_drug, K_Y, k_fold, dirname, types): 
    
    # save log
    f = open(dirname + "T4_{}.log".format(types), 'w')
    
    # model 
    threestep = ThreeStep()
    
    # K-fold Cross-validation
    auc_score = []
    for fold in range(k_fold): 
        print("### {}th training set is processing ###".format(fold+1), file =f)
        train = np.load(trains[fold])
        test = np.load(tests[fold])

        K_drug_train = K_drug[train][:,train]
        
        if len(K_Y) == 10:
            K_Y_train = np.load(K_Y[fold])
        else:
            K_Y_train = K_Y
        
        ## rebuild label tensor for trining  ##
        train_label_tensor, test_label_tensor = to_new_label_tensor(labels_tensor, train, test, [[1],[2]])
        
        # leave-one-out estimation
        print("Leave one out estimation processing...", file =f)
        
        # find hyperparameters (first two have same parameter) 
        ac = []
        for hyper in hyperparameters:

            print("LOO estimating... with the hyperparameter {}".format(hyper), file =f)
            FLO = threestep.leave_out_estimate([K_drug_train, K_drug_train, K_Y_train], train_label_tensor, hyper, [[1],[2]])
            ac.append(roc_auc_score(train_label_tensor.flatten(), FLO.flatten()))

        ac_max = np.argmax(ac)

        chosen_hyper = hyperparameters[ac_max]
        print('chosen hyperparameter', chosen_hyper, file =f)

        # fit into the model with selected hyperparameters 
        print("Fitting the model...", file =f)
        threestep.fit([K_drug_train, K_drug_train, K_Y_train], train_label_tensor, chosen_hyper)

        # predict with test datasets     
        print("Predicting the model...", file =f)
        K_drug_test = K_drug[test][:,train]  ### 
        F = threestep.predict([K_drug_test, K_drug_test, K_Y_train])

        # flatten 
        test_flatten = test_label_tensor.flatten()
        F_flatten = F.flatten()
        
        # filter NaN Values
        nan_idx = np.argwhere(np.isnan(test_flatten))

        test_adjusted = np.delete(test_flatten, nan_idx)
        F_adjusted = np.delete(F_flatten, nan_idx)

        # auc score 
        score = roc_auc_score(test_adjusted, F_adjusted)
        print("score:", score , file =f)
        auc_score.append(score)

        score_to_write = open(dirname + "T4_{}_score.txt".format(types), 'a')
        score_to_write.write(str(score))
        score_to_write.write('\n')
        score_to_write.close()
    
    score_to_write = open(dirname + "T4_{}_score.txt".format(types), 'a')
    score_to_write.write('avg score: ' + str(sum(auc_score)/len(auc_score)))
    score_to_write.close()
    print("avg score: ", sum(auc_score)/len(auc_score))    

## TDC

#### TDC Labels

In [2]:
# TDC triplets > Yabc (3D Label Tensor) 
labels_triplets = np.loadtxt("../Final_DF/TDC_Label.txt").astype(int)
labels_tensor = triplets_to_tensor(labels_triplets, 645, 1317).astype(bool)

#### TDC Datasets

In [None]:
# positive labels & negative labels
tests_pos = sorted(glob.glob('../Final_Experiments/TDC/T1_test_pos_[0-9]*'))
tests_neg = sorted(glob.glob('../Final_Experiments/TDC/T1_test_neg_[0-9]*'))

# drug similarity kernels
K_drug_smile_cos = np.loadtxt("../Final_DF/TDC_drug_smile_cos.txt")  # [1]
K_drug_morgan_cos = np.loadtxt("../Final_DF/TDC_drug_morgan_cos.txt") # [2]
K_drug_smile_tanimoto = np.loadtxt("../Final_DF/TDC_drug_smile_tanimoto.txt")     # [3]
K_drug_morgan_tanimoto = np.loadtxt("../Final_DF/TDC_drug_morgan_tanimoto.txt")  # [4]

# Y(side effect) similarity kernels
K_SE_cos =  np.loadtxt("../Final_DF/TDC_SE_cos.txt")   # [1]
K_SE_jacc = np.loadtxt("../Final_DF/TDC_SE_jacc.txt")  # [2]


### Training & Testing

In [None]:
hyperparameters = [[0.1,0.1,0.1], [1,1,1], [10,10,10]]

#### # Setting 1

In [None]:
learning_setting1(hyperparameters, K_drug_smile_cos, K_SE_cos, 10, "../Final_Experiments/TDC/", "[1][1]")
learning_setting1(hyperparameters, K_drug_smile_cos, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[1][2]")
learning_setting1(hyperparameters, K_drug_morgan_cos, K_SE_cos, 10, "../Final_Experiments/TDC/", "[2][1]")
learning_setting1(hyperparameters, K_drug_morgan_cos, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[2][2]")
learning_setting1(hyperparameters, K_drug_smile_tanimoto, K_SE_cos, 10, "../Final_Experiments/TDC/", "[3][1]")
learning_setting1(hyperparameters, K_drug_smile_tanimoto, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[3][2]")
learning_setting1(hyperparameters, K_drug_morgan_tanimoto, K_SE_cos, 10, "../Final_Experiments/TDC/", "[4][1]")
learning_setting1(hyperparameters, K_drug_morgan_tanimoto, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[4][2]")

#### # Setting 2

In [None]:
# train & test
trains = sorted(glob.glob('../Final_Experiments/TDC/T2_train_[0-9]*'))
tests = sorted(glob.glob('../Final_Experiments/TDC/T2_test_[0-9]*'))

# modified Y(side effect) similarity kernels
K_SE_cos = sorted(glob.glob('../Final_Experiments/TDC/T2_train_K_SE_cos_*'))    # [1]
K_SE_jacc = sorted(glob.glob('../Final_Experiments/TDC/T2_train_K_SE_jacc_*'))  # [2]

In [None]:
learning_setting2(hyperparameters, K_drug_smile_cos, K_SE_cos, 10, "../Final_Experiments/TDC/", "[1][1]")
learning_setting2(hyperparameters, K_drug_smile_cos, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[1][2]")
learning_setting2(hyperparameters, K_drug_morgan_cos, K_SE_cos, 10, "../Final_Experiments/TDC/", "[2][1]")
learning_setting2(hyperparameters, K_drug_morgan_cos, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[2][2]")
learning_setting2(hyperparameters, K_drug_smile_tanimoto, K_SE_cos, 10, "../Final_Experiments/TDC/", "[3][1]")
learning_setting2(hyperparameters, K_drug_smile_tanimoto, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[3][2]")
learning_setting2(hyperparameters, K_drug_morgan_tanimoto, K_SE_cos, 10, "../Final_Experiments/TDC/", "[4][1]")
learning_setting2(hyperparameters, K_drug_morgan_tanimoto, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[4][2]")

#### # Setting 3

In [None]:
# train & test
trains = sorted(glob.glob('../Final_Experiments/TDC/T3_train_[0-9]*'))
tests = sorted(glob.glob('../Final_Experiments/TDC/T3_test_[0-9]*'))

# modified Y(side effect) similarity kernels
K_SE_cos =  sorted(glob.glob("../Final_Experiments/TDC/T3_train_K_SE_cos_[0-9]*"))   # [1]
K_SE_jacc = sorted(glob.glob("../Final_Experiments/TDC/T3_train_K_SE_jacc_[0-9]*"))  # [2]

In [None]:
learning_setting3(hyperparameters, K_drug_smile_cos, K_SE_cos, 10, "../Final_Experiments/TDC/", "[1][1]")
learning_setting3(hyperparameters, K_drug_smile_cos, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[1][2]")
learning_setting3(hyperparameters, K_drug_morgan_cos, K_SE_cos, 10, "../Final_Experiments/TDC/", "[2][1]")
learning_setting3(hyperparameters, K_drug_morgan_cos, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[2][2]")
learning_setting3(hyperparameters, K_drug_smile_tanimoto, K_SE_cos, 10, "../Final_Experiments/TDC/", "[3][1]")
learning_setting3(hyperparameters, K_drug_smile_tanimoto, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[3][2]")
learning_setting3(hyperparameters, K_drug_morgan_tanimoto, K_SE_cos, 10, "../Final_Experiments/TDC/", "[4][1]")
learning_setting3(hyperparameters, K_drug_morgan_tanimoto, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[4][2]")

#### # Setting 4

In [None]:
learning_setting4(hyperparameters, K_drug_smile_cos, K_SE_cos, 10, "../Final_Experiments/TDC/", "[1][1]")
learning_setting4(hyperparameters, K_drug_smile_cos, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[1][2]")
learning_setting4(hyperparameters, K_drug_morgan_cos, K_SE_cos, 10, "../Final_Experiments/TDC/", "[2][1]")
learning_setting4(hyperparameters, K_drug_morgan_cos, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[2][2]")
learning_setting4(hyperparameters, K_drug_smile_tanimoto, K_SE_cos, 10, "../Final_Experiments/TDC/", "[3][1]")
learning_setting4(hyperparameters, K_drug_smile_tanimoto, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[3][2]")
learning_setting4(hyperparameters, K_drug_morgan_tanimoto, K_SE_cos, 10, "../Final_Experiments/TDC/", "[4][1]")
learning_setting4(hyperparameters, K_drug_morgan_tanimoto, K_SE_jacc, 10, "../Final_Experiments/TDC/", "[4][2]")

## DCDB

#### DCDB Labels

In [None]:
# DCDB triplets > Yabc (3D Label Tensor) 
labels_triplets = np.loadtxt("../Final_DF/DCDB_Label.txt").astype('int16')
labels_tensor = triplets_to_tensor(labels_triplets, 546, 268).astype(bool)

#### DCDB Datasets

In [None]:
# positive labels & negative labels
tests_pos = sorted(glob.glob('../Final_Experiments/DCDB/T1_test_pos_[0-9]*'))
tests_neg = sorted(glob.glob('../Final_Experiments/DCDB/T1_test_neg_[0-9]*'))

# drug similarity kernels
K_drug_smile_cos = np.loadtxt("../Final_DF/DCDB_drug_smile_cos.txt")             # [1]
K_drug_morgan_cos = np.loadtxt("../Final_DF/DCDB_drug_morgan_cos.txt")           # [2] 
K_drug_smile_tanimoto = np.loadtxt("../Final_DF/DCDB_drug_smile_tanimoto.txt")   # [3]
K_drug_morgan_tanimoto = np.loadtxt("../Final_DF/DCDB_drug_morgan_tanimoto.txt") # [4]

# Y(disease) similarity kernels   
K_ICD10_pair_cos =  np.loadtxt("../Final_DF/DCDB_ICD10_pair_cos.txt")     # [1]
K_ICD10_pair_jacc = np.loadtxt("../Final_DF/DCDB_ICD10_pair_jacc.txt")    # [2]
K_ICD10_hier =  np.loadtxt("../Final_DF/DCDB_ICD10_hier.txt")             # [3]
K_ICD10_jacc = np.loadtxt("../Final_DF/DCDB_ICD10_jacc.txt")              # [4]

### Training & Testing

In [None]:
hyperparameters = [[1,1,1],[10,10,10],[100,100,100]]

#### # Setting 1

In [None]:
learning_setting1(hyperparameters, K_drug_smile_cos, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[1][1]")
learning_setting1(hyperparameters, K_drug_smile_cos, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[1][2]")
learning_setting1(hyperparameters, K_drug_smile_cos, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[1][3]")
learning_setting1(hyperparameters, K_drug_smile_cos, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[1][4]")
learning_setting1(hyperparameters, K_drug_morgan_cos, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[2][1]")
learning_setting1(hyperparameters, K_drug_morgan_cos, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[2][2]")
learning_setting1(hyperparameters, K_drug_morgan_cos, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[2][3]")
learning_setting1(hyperparameters, K_drug_morgan_cos, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[2][4]")
learning_setting1(hyperparameters, K_drug_smile_tanimoto, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[3][1]")
learning_setting1(hyperparameters, K_drug_smile_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[3][2]")
learning_setting1(hyperparameters, K_drug_smile_tanimoto, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[3][3]")
learning_setting1(hyperparameters, K_drug_smile_tanimoto, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[3][4]")
learning_setting1(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[4][1]")
learning_setting1(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[4][2]")
learning_setting1(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[4][2]")
learning_setting1(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[4][4]")

#### # Setting 2

In [None]:
# train & test
trains = sorted(glob.glob('../Final_Experiments/DCDB/T2_train_[0-9]*'))
tests = sorted(glob.glob('../Final_Experiments/DCDB/T2_test_[0-9]*'))

# modified Y(disease) similarity kernels  
K_ICD10_pairs_cos =  sorted(glob.glob("../Final_Experiments/DCDB/T2_train_K_ICD10_cos_[0-9]*"))   # [1]
K_ICD10_pairs_jacc = sorted(glob.glob("../Final_Experiments/DCDB/T2_train_K_ICD10_jacc_[0-9]*"))  # [2] 

In [None]:
learning_setting2(hyperparameters, K_drug_smile_cos, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[1][1]")
learning_setting2(hyperparameters, K_drug_smile_cos, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[1][2]")
learning_setting2(hyperparameters, K_drug_smile_cos, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[1][3]")
learning_setting2(hyperparameters, K_drug_smile_cos, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[1][4]")
learning_setting2(hyperparameters, K_drug_morgan_cos, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[2][1]")
learning_setting2(hyperparameters, K_drug_morgan_cos, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[2][2]")
learning_setting2(hyperparameters, K_drug_morgan_cos, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[2][3]")
learning_setting2(hyperparameters, K_drug_morgan_cos, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[2][4]")
learning_setting2(hyperparameters, K_drug_smile_tanimoto, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[3][1]")
learning_setting2(hyperparameters, K_drug_smile_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[3][2]")
learning_setting2(hyperparameters, K_drug_smile_tanimoto, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[3][3]")
learning_setting2(hyperparameters, K_drug_smile_tanimoto, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[3][4]")
learning_setting2(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[4][1]")
learning_setting2(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[4][2]")
learning_setting2(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[4][2]")
learning_setting2(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[4][4]")

#### # Setting 3

In [None]:
# train & test
trains = sorted(glob.glob('../Final_Experiments/DCDB/T3_train_[0-9]*'))
tests = sorted(glob.glob('../Final_Experiments/DCDB/T3_test_[0-9]*'))

# modified Y(disease) similarity kernels  
K_ICD10_pairs_cos = sorted(glob.glob("../Final_Experiments/DCDB/T3_train_K_ICD10_cos_[0-9]*"))     # [1]
K_ICD10_pairs_jacc = sorted(glob.glob("../Final_Experiments/DCDB/T3_train_K_ICD10_jacc_[0-9]*"))   # [2]

In [None]:
learning_setting3(hyperparameters, K_drug_smile_cos, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[1][1]")
learning_setting3(hyperparameters, K_drug_smile_cos, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[1][2]")
learning_setting3(hyperparameters, K_drug_smile_cos, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[1][3]")
learning_setting3(hyperparameters, K_drug_smile_cos, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[1][4]")
learning_setting3(hyperparameters, K_drug_morgan_cos, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[2][1]")
learning_setting3(hyperparameters, K_drug_morgan_cos, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[2][2]")
learning_setting3(hyperparameters, K_drug_morgan_cos, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[2][3]")
learning_setting3(hyperparameters, K_drug_morgan_cos, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[2][4]")
learning_setting3(hyperparameters, K_drug_smile_tanimoto, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[3][1]")
learning_setting3(hyperparameters, K_drug_smile_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[3][2]")
learning_setting3(hyperparameters, K_drug_smile_tanimoto, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[3][3]")
learning_setting3(hyperparameters, K_drug_smile_tanimoto, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[3][4]")
learning_setting3(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[4][1]")
learning_setting3(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[4][2]")
learning_setting3(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[4][2]")
learning_setting3(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[4][4]")

#### # Setting 4

In [None]:
learning_setting4(hyperparameters, K_drug_smile_cos, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[1][1]")
learning_setting4(hyperparameters, K_drug_smile_cos, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[1][2]")
learning_setting4(hyperparameters, K_drug_smile_cos, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[1][3]")
learning_setting4(hyperparameters, K_drug_smile_cos, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[1][4]")
learning_setting4(hyperparameters, K_drug_morgan_cos, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[2][1]")
learning_setting4(hyperparameters, K_drug_morgan_cos, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[2][2]")
learning_setting4(hyperparameters, K_drug_morgan_cos, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[2][3]")
learning_setting4(hyperparameters, K_drug_morgan_cos, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[2][4]")
learning_setting4(hyperparameters, K_drug_smile_tanimoto, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[3][1]")
learning_setting4(hyperparameters, K_drug_smile_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[3][2]")
learning_setting4(hyperparameters, K_drug_smile_tanimoto, K_ICD10_hier, 10, "../Final_Experiments/DCDB/", "[3][3]")
learning_setting4(hyperparameters, K_drug_smile_tanimoto, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[3][4]")
learning_setting4(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_cos, 10, "../Final_Experiments/DCDB/", "[4][1]")
learning_setting4(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[4][2]")
learning_setting4(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_pair_jacc, 10, "../Final_Experiments/DCDB/", "[4][2]")
learning_setting4(hyperparameters, K_drug_morgan_tanimoto, K_ICD10_jacc, 10, "../Final_Experiments/DCDB/", "[4][4]")

## NCI-ALMANAC

#### NCI Labels

In [None]:
# NCI triplets > Yabc (3D Label Tensor) 
labels_triplets = np.loadtxt("../Final_DF/NCI_Label_filtered.txt").astype(int)[:,0:3]  
labels_tensor = triplets_to_tensor(labels_triplets, 103, 60)  

#### NCI Datasets

In [None]:
## positive labels & negative labels
tests_pos = sorted(glob.glob('../Final_Experiments/NCI/T1_test_pos_[0-9]*'))
tests_neg = sorted(glob.glob('../Final_Experiments/NCI/T1_test_neg_[0-9]*'))

# drug similarity kernels 
K_drug_smile_cos = np.loadtxt("../Final_DF/NCI_drug_smile_cos.txt")              # [1]
K_drug_morgan_cos = np.loadtxt("../Final_DF/NCI_drug_morgan_cos.txt")            # [2]
K_drug_smile_tanimoto = np.loadtxt("../Final_DF/NCI_drug_smile_tanimoto.txt")    # [3]
K_drug_morgan_tanimoto = np.loadtxt("../Final_DF/NCI_drug_morgan_tanimoto.txt")  # [4]

## Y(cancer cell line) similarity kernels
K_cellLine_cos =  np.loadtxt("../Final_DF/NCI_CellLine_cos.txt")   # [1]
K_cellLine_jacc = np.loadtxt("../Final_DF/NCI_CellLine_jacc.txt")  # [2]

In [None]:
hyperparameters = [[0.01, 0.01, 0.01],[0.1,0.1,0.1], [1,1,1], [10,10,10]]

#### Setting 1

In [None]:
learning_setting1(hyperparameters, K_drug_smile_cos, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[1][1]")
learning_setting1(hyperparameters, K_drug_smile_cos, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[1][2]")
learning_setting1(hyperparameters, K_drug_morgan_cos, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[2][1]")
learning_setting1(hyperparameters, K_drug_morgan_cos, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[2][2]")
learning_setting1(hyperparameters, K_drug_smile_tanimoto, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[3][1]")
learning_setting1(hyperparameters, K_drug_smile_tanimoto, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[3][2]")
learning_setting1(hyperparameters, K_drug_morgan_tanimoto, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[4][1]")
learning_setting1(hyperparameters, K_drug_morgan_tanimoto, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[4][2]")

#### Setting 2

In [None]:
# train & test 
trains = sorted(glob.glob('../Final_Experiments/NCI/T2_train_[0-9]*'))
tests = sorted(glob.glob('../Final_Experiments/NCI/T2_test_[0-9]*'))

# modified Y(cancer cell line) similarity kernels
K_cellLine_cos =  sorted(glob.glob("../Final_Experiments/NCI/T2_train_K_cellLine_cos_[0-9]*"))
K_cellLine_jacc = sorted(glob.glob("../Final_Experiments/NCI/T2_train_K_cellLine_jacc_[0-9]*"))

In [None]:
learning_setting2(hyperparameters, K_drug_smile_cos, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[1][1]")
learning_setting2(hyperparameters, K_drug_smile_cos, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[1][2]")
learning_setting2(hyperparameters, K_drug_morgan_cos, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[2][1]")
learning_setting2(hyperparameters, K_drug_morgan_cos, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[2][2]")
learning_setting2(hyperparameters, K_drug_smile_tanimoto, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[3][1]")
learning_setting2(hyperparameters, K_drug_smile_tanimoto, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[3][2]")
learning_setting2(hyperparameters, K_drug_morgan_tanimoto, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[4][1]")
learning_setting2(hyperparameters, K_drug_morgan_tanimoto, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[4][2]")

#### Setting 3

In [None]:
# train & test 
trains = sorted(glob.glob('../Final_Experiments/NCI/T3_train_[0-9]*'))
tests = sorted(glob.glob('../Final_Experiments/NCI/T3_test_[0-9]*'))

# modified Y(cancer cell line) similarity kernels
K_cellLine_cos =  sorted(glob.glob("../Final_Experiments/NCI/T3_train_K_cellLine_cos_[0-9]*"))
K_cellLine_jacc = sorted(glob.glob("../Final_Experiments/NCI/T3_train_K_cellLine_jacc_[0-9]*"))

In [None]:
learning_setting3(hyperparameters, K_drug_smile_cos, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[1][1]")
learning_setting3(hyperparameters, K_drug_smile_cos, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[1][2]")
learning_setting3(hyperparameters, K_drug_morgan_cos, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[2][1]")
learning_setting3(hyperparameters, K_drug_morgan_cos, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[2][2]")
learning_setting3(hyperparameters, K_drug_smile_tanimoto, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[3][1]")
learning_setting3(hyperparameters, K_drug_smile_tanimoto, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[3][2]")
learning_setting3(hyperparameters, K_drug_morgan_tanimoto, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[4][1]")
learning_setting3(hyperparameters, K_drug_morgan_tanimoto, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[4][2]")

#### Setting 4

In [None]:
learning_setting4(hyperparameters, K_drug_smile_cos, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[1][1]")
learning_setting4(hyperparameters, K_drug_smile_cos, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[1][2]")
learning_setting4(hyperparameters, K_drug_morgan_cos, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[2][1]")
learning_setting4(hyperparameters, K_drug_morgan_cos, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[2][2]")
learning_setting4(hyperparameters, K_drug_smile_tanimoto, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[3][1]")
learning_setting4(hyperparameters, K_drug_smile_tanimoto, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[3][2]")
learning_setting4(hyperparameters, K_drug_morgan_tanimoto, K_cellLine_cos, 10, "../Final_Experiments/NCI/", "[4][1]")
learning_setting4(hyperparameters, K_drug_morgan_tanimoto, K_cellLine_jacc, 10, "../Final_Experiments/NCI/", "[4][2]")

In [None]:
# for further experiments

# # 10 %
# labels_triplets_10 = np.loadtxt("../Final_DF/NCI_Label_filtered_10.txt").astype(int)[:,0:3]   
# labels_tensor = triplets_to_tensor(labels_triplets_10, 103, 60)  

# # 5 %
# labels_triplets_5 = np.loadtxt("../Final_DF/NCI_Label_filtered_5.txt").astype(int)[:,0:3]   
# labels_tensor = triplets_to_tensor(labels_triplets_5, 103, 60)  

# # 2 %
# labels_triplets_2 = np.loadtxt("../Final_DF/NCI_Label_filtered_2.txt").astype(int)[:,0:3]   
# labels_tensor = triplets_to_tensor(labels_triplets_2, 103, 60)  

# # 1 %
# labels_triplets_1 = np.loadtxt("../Final_DF/NCI_Label_filtered_1.txt").astype(int)[:,0:3]   
# labels_tensor = triplets_to_tensor(labels_triplets_1, 103, 60)