In [32]:
import yaml
import warnings
from utils import *
import numpy as np
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score, auc
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report
from sklearn.utils import shuffle
import torch
import torch.nn.functional as F
from layer import *
from tqdm import tqdm
warnings.filterwarnings("ignore")

with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)
config['device'] = "cuda" if torch.cuda.is_available() else "cpu"
#config['device'] = "cpu"
device = "cuda" if torch.cuda.is_available() else "cpu"
setup(config['seed'])


reg_loss_co = 0.0002
fold = 0

torch.set_default_dtype(torch.float32)

## KNN—Method

In [33]:
class GCN(nn.Module):
    def __init__(self, nfeat, dropout):
        super(GCN, self).__init__()
        self.gc1 = GraphConvolution(nfeat, 128)
        self.gc2 = GraphConvolution(128, 128)
        self.gc3 = GraphConvolution(128, 128)
        self.dropout = dropout

    def forward(self, x, adj):
        x = x.to(device)
        adj = adj.to(device)
        x11 = F.relu(self.gc1(x, adj), inplace=True)  # 试下tanh
        x1 = F.dropout(x11, self.dropout)
        x22 = self.gc2(x1, adj)
        x2 = F.dropout(x22, self.dropout)
        x3 = self.gc3(x2, adj)
 
        return x11, x22, x3
    
class MLP(nn.Module):
    def __init__(self, nfeat):
        super(MLP, self).__init__()
        self.MLP = nn.Sequential(
            nn.Linear(nfeat, 64, bias=False).apply(init),
            nn.ELU(),
            nn.Linear(64, 16, bias=False).apply(init),
            nn.ELU(),
            nn.Linear(16, 2, bias=False),
            nn.LogSoftmax(dim=1))

    def forward(self, x):
        output = self.MLP(x)
        return output

In [34]:
def knngraph(dateset, feature, aug=False):

    fedge = np.array(generate_knn(feature.cpu().detach().numpy()))
    fedge = load_graph(np.array(fedge), dateset.shape[0])
    edg = torch.Tensor.to_dense(fedge)
    edge = edg.numpy()
    return fedge, feature

In [35]:
class KNN_model(nn.Module):

    def __init__(self, num_protein, num_drug, num_hidden1, num_hidden2, num_out, dropout=0.5,feature_list = [1,2,3]):
        super(KNN_model, self).__init__()
        self.GCN  = GCN(num_hidden2, dropout)
        self.MLP = MLP(num_hidden2)
        self.alpha = 0.8
        self.d = nn.Parameter(torch.Tensor((3), 1, 1))
    def SGC(self, feature, adj):
        if adj.is_sparse:
            adj = adj.to_dense()
        adj = adj + (torch.eye(adj.shape[0]).cuda()) * 2
        deg = torch.sum(adj, dim=1)
        deg[deg <= 1e-10] = 1
        deg_inv = deg.pow(-0.5)
        deg_inv = deg_inv * torch.eye(adj.shape[0]).type(torch.FloatTensor).cuda()
        adj = torch.mm(deg_inv, adj)
        adj = torch.mm(adj, deg_inv).type(torch.FloatTensor)

        output = torch.mm(adj.cuda(), feature.cuda())

        return output
    def forward(self,drug_vec, feature, fedge,dateset_index,  iftrain=True, d=None, p=None):

        adj =  fedge
     
        X_conv1 = self.SGC(feature, adj)
        X_conv2 = self.SGC(X_conv1, adj)
        X_conv3 = self.SGC(X_conv2, adj)
        X_conv4 = self.SGC(X_conv3, adj)
        X_conv5 = self.SGC(X_conv4, adj)
        conv_sum = self.SGC(X_conv5, adj)
        conv_sumx = self.SGC(conv_sum, adj)
        drug_feature_ht = conv_sumx[:drug_vec.shape[0]]
        protein_feature_ht = conv_sumx[drug_vec.shape[0]:]
        if iftrain:
            d, p = drug_feature_ht, protein_feature_ht
        z4, z5, z6 = self.GCN(feature, fedge)
        att4 = F.softmax(self.d, dim=0)
        feature_knn = torch.stack((z4, z5, z6), dim=0)
        feature_knn = torch.sum((att4 * feature_knn), dim=0)
        pred = self.MLP(feature_knn[dateset_index])
        #pred =  self._MLP (feature_knn[dateset_index])
        if iftrain:
            return pred, d, p
        return pred


def init(i):
    if isinstance(i, nn.Linear):
        torch.nn.init.xavier_uniform_(i.weight)


In [36]:

def main_test(model, d, p, test_index, epoch, fold):
    model.eval()
    out = model( features_d, feature, fedge, test_index)

    acc1 = (out.argmax(dim=1) == label[test_index].reshape(-1).long()).sum(dtype=float) / torch.tensor(len(test_index), dtype=float)
    task_roc = get_roc(out, label[test_index])
    task_precision,task_recall,task_pr = get_pr(out, label[test_index])
    task_f1 = get_f1score(out, label[test_index])
    # if epoch == 799:
    #     f = open(f"{fold}out.txt","w",encoding="utf-8")
    #     for o in  (out.argmax(dim=1) == label[test_index].reshape(-1)):
    #         f.write(f"{o}\n")
    #     f.close()
    return acc1, task_roc, task_pr, task_precision,task_recall,task_f1

def train(model, optim, train_index, test_index, epoch, fold):
    model.train()
    out, d, p = model(features_d,feature, fedge, train_index)
    #print(out.shape,d.shape,p.shape)
    train_acc = (out.argmax(dim=1) == label[train_index].reshape(-1).long()).sum(dtype=float) / torch.tensor(len(train_index), dtype=float)
    task1_roc = get_roc(out, label[train_index])
    reg = get_L2reg(model.parameters())
    loss = F.nll_loss(out, label[train_index].reshape(-1).long()) + reg_loss_co * reg
    #print(train_acc,loss)
    optim.zero_grad()
    loss.backward()
    optim.step()
    
    te_acc, te_task1_roc1, te_task1_pr, te_task_precision,te_task_recall,te_task1_f1 = main_test(model, d, p, test_index, epoch, fold)

    return loss.item(), train_acc, task1_roc, te_acc, te_task1_roc1, te_task1_pr, te_task_precision,te_task_recall,te_task1_f1


def main(tr, te, seed):
        results = []
        for i in range(len(tr)):
            f = open( os.path.join(config['results_dir'],f"{name}_{config['feature_list']}_{i}foldtrain.txt"), "w", encoding="utf-8")
            train_index = tr[i]
            for train_index_one in train_index:
                f.write(f"{train_index_one}\n")
            test_index = te[i]
            f = open( os.path.join(config['results_dir'],f"{name}_{config['feature_list']}_{i}foldtest.txt"), "w", encoding="utf-8")
            for train_index_one in test_index:
                f.write(f"{train_index_one}\n")
            #
            # if not os.path.isdir(f"{dir}"):
            #     os.makedirs(f"{dir}")
                
            # model.load_state_dict(torch.load(f"{dir}/net{i}.pth"))
            optim = torch.optim.Adam(lr=config['lr'], weight_decay= float(config['weight_decay']), params=model.parameters())
            best_roc =0
            best_results = []
            for epoch in tqdm(range(config['epochs'])):
                loss, train_acc, task1_roc, acc, task1_roc1, task1_pr,task1_precision,task1_recall, task1_f1 = train(model, optim, train_index, test_index, epoch, i)
                if task1_roc1 > best_roc:
                    best_roc = task1_roc1
                    best_model_state = model.state_dict()  # Update the best model state
                    torch.save(best_model_state, os.path.join(config['save_dir'], f"{config['feature_list']}_dataset_{name}_best_model_fold_{i}_roc.pth"))
                    best_results = acc, task1_roc1, task1_pr,task1_precision,task1_recall, task1_f1
                    best_results = list(tuple(f"{value:.4f}" for value in best_results))

            results.append(best_results)

            print(f'{i} fold cv results:', f'dataset {name}, Acc is:{best_results[0]},  AUROC is:{best_results[1]}, AUPr is:{best_results[2]},\
Precision is:{best_results[3]} , recall is :{best_results[4]},f1 is:{best_results[5]}')
        df_results = pd.DataFrame(results, columns=['Accuracy', 'AUROC', 'AUPr',"Precision","recall","f1"])
        print(f'5-fold cv averaged results:', f'dataset {name}')
        print( df_results.mean())
        df_results.to_csv(os.path.join(config['results_dir'],f"{name}_{config['feature_list']}_CV_resluts.csv"),index=False)


In [37]:
for name in ['Luo',"Es","GPCRs","ICs","NRs",'Zheng']: 
    node_num, drug_protein, protein_drug, dtidata,features_d,features_p,HyGraph_Drug,HyGraph_protein = load_feature(name,config['feature_list'])
    #print(dtidata)
    #dti_label = F.one_hot(torch.tensor(dtidata[:, 2:3]), num_classes=2)
    #dti_label = torch.squeeze(dti_label, dim=1)
    
    dti_label = torch.tensor(dtidata[:, 2:3])
    dti_label = dti_label.squeeze() 
    #print(dti_label)
    indices_features1 = dtidata[:, 0]  # 提取第一个 Tensor 的行索引
    indices_features2 = dtidata[:, 1]  # 提取第二个 Tensor 的行索引
    selected_features1 = features_d[indices_features1]
    selected_features2 = features_p[indices_features2]
    features = torch.cat((selected_features1, selected_features2), dim=1)
    features, dti_label = shuffle(features, dti_label, random_state=42)
    
    fedge, feature = knngraph(dtidata, features)
    fedge =  fedge.to(config['device'])
    feature = feature.to(config['device'])
    print(fedge.shape,feature.shape)
    label = dti_label
    data = dtidata
    model = KNN_model(
                        num_protein=node_num[1],
                        num_drug=node_num[0],
                        num_hidden1=config['in_size'],
                        num_hidden2=config['hidden_size'],
                        num_out=config['out_size'],
                        feature_list= config['feature_list']).to(config['device'])
    train_indeces, test_indeces = get_cross(dtidata)
    main(train_indeces, test_indeces, config['seed'])
    


load LLM features
torch.Size([3846, 3846]) torch.Size([3846, 704])


  0%|          | 0/800 [00:00<?, ?it/s]

 ** On entry to SGEMM  parameter number 10 had an illegal value





RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`