In [25]:
# _*_ coding: utf-8 _*_
# @Time : $[DATE] $[TIME]
# @Author : G

import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix, f1_score, balanced_accuracy_score, matthews_corrcoef
import itertools

# Evaluation Metrics
def accuracy(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

def precision(y_true, y_pred):
    return precision_score(y_true, y_pred, pos_label=1, average="binary")

def recall(y_true, y_pred):
    return recall_score(y_true, y_pred, pos_label=1, average="binary")

def auc(y_true, y_scores):
    return roc_auc_score(y_true, y_scores)

def mcc(y_true, y_pred):
    return matthews_corrcoef(y_true, y_pred)

def new_confusion_matrix(y_true, y_pred):
    return confusion_matrix(y_true, y_pred, labels=[0, 1])

def sp(y_true, y_pred):
    cm = new_confusion_matrix(y_true, y_pred)
    return cm[0, 0] * 1.0 / (cm[0, 0] + cm[0, 1])

def BACC(y_true, y_pred):
    return balanced_accuracy_score(y_true, y_pred)

def f1(y_true, y_pred):
    return f1_score(y_true, y_pred)

# ---> Loading prediction & Pron data
data = pd.read_csv('Seed2021_Consensus_test.csv', sep=',')
SMILES = data[['SMILES']]
y_true = data[['label']].values
Num_test = len(y_true)

# GCN
Label_GCN_pred = data[['label_GCN_pred']].values
Prediction_GCN_prob = data[['Prob_GCN']].values

# GAT
Label_GAT_pred = data[['label_GAT_pred']].values
Prediction_GAT_prob = data[['Prob_GAT']].values

# AttentiveFP
Label_AttentiveFP_pred = data[['label_AttentiveFP_pred']].values
Prediction_AttentiveFP_prob = data[['Prob_AttentiveFP']].values

# GraphSAGE
Label_GraphSAGE_pred = data[['label_GraphSAGE_pred']].values
Prediction_GraphSAGE_prob = data[['Prob_GraphSAGE']].values

# RF_MACCS
Label_RF_MACCS_pred = data[['label_RF_MACCS_pred']].values
Prediction_RF_MACCS_prob = data[['Prob_RF_MACCS']].values

# RF_Morgan
Label_RF_Morgan_pred = data[['label_RF_Morgan_pred']].values
Prediction_RF_Morgan_prob = data[['Prob_RF_Morgan']].values

def pred_read(name):
    The_pred = 'Label_' + '{}'.format(name) + '_pred'
    return The_pred
def prob_read(name):
    The_prob = 'Prediction_' + '{}'.format(name) + '_prob'
    return The_prob

In [26]:
# ---> Consistency Model
# ----> Soft Voting, using predicted class labels for majority rule voting

## chose two of six
a = 1
Model_list = list(itertools.combinations(['GCN', 'GAT', 'AttentiveFP', 'GraphSAGE', 'RF_MACCS', 'RF_Morgan'], 2))
Comb_AUC = []
Model_index = []
Model_names = []
Model_AUC = []
Model_ACC = []
Model_F1 = []
Model_BACC = []
Model_Precision = []
Model_Recall = []
Model_MCC = []
Model_SP = []
for i in Model_list:
    print('-' * 30 + 'The time {}'.format(a) + ' ***')
    pred1_name = pred_read(i[0])
    pred1 = locals()[pred1_name]
    prob1_name = prob_read(i[0])
    prob1 = locals()[prob1_name]

    pred2_name = pred_read(i[1])
    pred2 = locals()[pred2_name]
    prob2_name = prob_read(i[1])
    prob2 = locals()[prob2_name]

    Model_index.append(a)
    a = a + 1
    print('This Combination Models: {}'.format(i))

    y_pred_comb = []
    y_prob_comb = []
    for j in range(Num_test):
        comb_y_pred = pred1[j] + pred2[j]
        comb_y_prob = np.mean(prob1[j] + prob2[j])
        y_prob_comb.append(comb_y_prob)
        if int(comb_y_prob) >= 0.5:
            y_pred_comb.append(1)
        else:
            y_pred_comb.append(0)
    print('-' * 30 + 'Combination Model Top' + '-' * 30)
    print('ACC: {:.4f}'.format(accuracy(y_true, y_pred_comb)))
    print('F1: {:.4f}'.format(f1(y_true, y_pred_comb)))
    print('BACC: {:.4f}'.format(BACC(y_true, y_pred_comb)))
    print('Precision: {:.4f}'.format(precision(y_true, y_pred_comb)))
    print('Recall: {:.4f}'.format(recall(y_true, y_pred_comb)))
    print('MCC: {:.4f}'.format(mcc(y_true, y_pred_comb)))
    print('SP: {:.4f}'.format(sp(y_true, y_pred_comb)))
    print('AUC: {:.4f}'.format(auc(y_true, y_prob_comb)))

    AUC = auc(y_true, y_prob_comb)
    Comb_AUC.append(AUC)
print('Combination Model AUC List: ')
print(Comb_AUC)

# Pick the best model by AUC performance
Max_AUC_location = np.where(Comb_AUC == np.max(Comb_AUC))
index_Max = Max_AUC_location[0].tolist()

for x in index_Max:
    print('The best model is combination Number --> {:.0f}'.format(x))
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)
    print(Model_list[x])
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)

------------------------------The time 1 ***
This Combination Models: ('GCN', 'GAT')
------------------------------Combination Model Top------------------------------
ACC: 0.5255
F1: 0.6033
BACC: 0.5074
Precision: 0.5823
Recall: 0.6259
MCC: 0.0150
SP: 0.3889
AUC: 0.5137
------------------------------The time 2 ***
This Combination Models: ('GCN', 'AttentiveFP')
------------------------------Combination Model Top------------------------------
ACC: 0.5098
F1: 0.5847
BACC: 0.4938
Precision: 0.5714
Recall: 0.5986
MCC: -0.0126
SP: 0.3889
AUC: 0.4722
------------------------------The time 3 ***
This Combination Models: ('GCN', 'GraphSAGE')
------------------------------Combination Model Top------------------------------
ACC: 0.7608
F1: 0.7932
BACC: 0.7544
Precision: 0.7905
Recall: 0.7959
MCC: 0.5095
SP: 0.7130
AUC: 0.8614
------------------------------The time 4 ***
This Combination Models: ('GCN', 'RF_MACCS')
------------------------------Combination Model Top------------------------------


In [27]:
# -----> Soft Voting, using predict the class label based on the argmax of the sum of predicted probabilities
a = 1
Model_list = list(itertools.combinations(['GCN', 'GAT', 'AttentiveFP', 'GraphSAGE', 'RF_MACCS', 'RF_Morgan'], 3))
Comb_AUC = []
Model_index = []
Model_names = []
Model_AUC = []
Model_ACC = []
Model_F1 = []
Model_BACC = []
Model_Precision = []
Model_Recall= []
Model_MCC = []
Model_SP = []
for i in Model_list:
    print('-' * 30 + 'The time {}'.format(a) + ' ***')
    pred1_name = pred_read(i[0])
    pred1 = locals()[pred1_name]
    prob1_name = prob_read(i[0])
    prob1 = locals()[prob1_name]

    pred2_name = pred_read(i[1])
    pred2 = locals()[pred2_name]
    prob2_name = prob_read(i[1])
    prob2 = locals()[prob2_name]

    pred3_name = pred_read(i[2])
    pred3 = locals()[pred3_name]
    prob3_name = prob_read(i[2])
    prob3 = locals()[prob3_name]

    Model_index.append(a)
    a = a + 1
    print('This Combination Models: {}'.format(i))

    y_pred_comb = []
    y_prob_comb = []
    for j in range(Num_test):
        comb_y_prob = np.mean(prob1[j] + prob2[j] + prob3[j])
        y_prob_comb.append(comb_y_prob)
        # prob > 0.5 ---> label =1
        if int(comb_y_prob) >= 0.5:
            y_pred_comb.append(1)
        else:
            y_pred_comb.append(0)
    print('-' * 30 + 'Combination Model Top' + '-' * 30)
    print('ACC: {:.4f}'.format(accuracy(y_true, y_pred_comb)))
    print('F1: {:.4f}'.format(f1(y_true, y_pred_comb)))
    print('BACC: {:.4f}'.format(BACC(y_true, y_pred_comb)))
    print('Precision: {:.4f}'.format(precision(y_true, y_pred_comb)))
    print('Recall: {:.4f}'.format(recall(y_true, y_pred_comb)))
    print('MCC: {:.4f}'.format(mcc(y_true, y_pred_comb)))
    print('SP: {:.4f}'.format(sp(y_true, y_pred_comb)))
    print('AUC: {:.4f}'.format(auc(y_true, y_prob_comb)))

    AUC = auc(y_true, y_prob_comb)
    Comb_AUC.append(AUC)
print('Combination Model AUC List: ')
print(Comb_AUC)

# Pick the best model by AUC performance
Max_AUC_location = np.where(Comb_AUC == np.max(Comb_AUC))
index_Max = Max_AUC_location[0].tolist()
for x in index_Max:
    print('The best model is combination Number --> {:.0f}'.format(x))
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)
    print(Model_list[x])
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)


------------------------------The time 1 ***
This Combination Models: ('GCN', 'GAT', 'AttentiveFP')
------------------------------Combination Model Top------------------------------
ACC: 0.5686
F1: 0.6961
BACC: 0.5165
Precision: 0.5860
Recall: 0.8571
MCC: 0.0449
SP: 0.1759
AUC: 0.5062
------------------------------The time 2 ***
This Combination Models: ('GCN', 'GAT', 'GraphSAGE')
------------------------------Combination Model Top------------------------------
ACC: 0.6902
F1: 0.7836
BACC: 0.6392
Precision: 0.6560
Recall: 0.9728
MCC: 0.3905
SP: 0.3056
AUC: 0.8202
------------------------------The time 3 ***
This Combination Models: ('GCN', 'GAT', 'RF_MACCS')
------------------------------Combination Model Top------------------------------
ACC: 0.5529
F1: 0.6851
BACC: 0.5005
Precision: 0.5767
Recall: 0.8435
MCC: 0.0013
SP: 0.1574
AUC: 0.4970
------------------------------The time 4 ***
This Combination Models: ('GCN', 'GAT', 'RF_Morgan')
------------------------------Combination Model T

In [28]:
# chose four of six
a = 1
Model_list = list(itertools.combinations(['GCN', 'GAT', 'AttentiveFP', 'GraphSAGE', 'RF_MACCS', 'RF_Morgan'], 4))
Comb_AUC = []
Model_index = []
Model_names = []
Model_AUC = []
Model_ACC = []
Model_F1 = []
Model_BACC = []
Model_Precision = []
Model_Recall= []
Model_MCC = []
Model_SP = []
for i in Model_list:
    print('-' * 30 + 'The time {}'.format(a) + ' ***')
    pred1_name = pred_read(i[0])
    pred1 = locals()[pred1_name]
    prob1_name = prob_read(i[0])
    prob1 = locals()[prob1_name]

    pred2_name = pred_read(i[1])
    pred2 = locals()[pred2_name]
    prob2_name = prob_read(i[1])
    prob2 = locals()[prob2_name]

    pred3_name = pred_read(i[2])
    pred3 = locals()[pred3_name]
    prob3_name = prob_read(i[2])
    prob3 = locals()[prob3_name]

    pred4_name = pred_read(i[3])
    pred4 = locals()[pred4_name]
    prob4_name = prob_read(i[3])
    prob4 = locals()[prob4_name]

    Model_index.append(a)
    a = a + 1
    print('This Combination Models: {}'.format(i))

    y_pred_comb = []
    y_prob_comb = []
    for j in range(Num_test):
        comb_y_pred = pred1[j] + pred2[j] + pred3[j] + pred4[j]
        comb_y_prob = np.mean(prob1[j] + prob2[j] + prob3[j] + prob4[j])
        y_prob_comb.append(comb_y_prob)
        # 2/4 ---> label =1
        if int(comb_y_prob) >= 0.5:
            y_pred_comb.append(1)
        else:
            y_pred_comb.append(0)
    print('-' * 30 + 'Combination Model Top' + '-' * 30)
    print('ACC: {:.4f}'.format(accuracy(y_true, y_pred_comb)))
    print('F1: {:.4f}'.format(f1(y_true, y_pred_comb)))
    print('BACC: {:.4f}'.format(BACC(y_true, y_pred_comb)))
    print('Precision: {:.4f}'.format(precision(y_true, y_pred_comb)))
    print('Recall: {:.4f}'.format(recall(y_true, y_pred_comb)))
    print('MCC: {:.4f}'.format(accuracy(y_true, y_pred_comb)))
    print('SP: {:.4f}'.format(sp(y_true, y_pred_comb)))
    print('AUC: {:.4f}'.format(auc(y_true, y_prob_comb)))

    AUC = auc(y_true, y_prob_comb)
    Comb_AUC.append(AUC)
print('Combination Model AUC List: ')
print(Comb_AUC)

# Pick the best model by AUC performance
Max_AUC_location = np.where(Comb_AUC == np.max(Comb_AUC))
index_Max = Max_AUC_location[0].tolist()
for x in index_Max:
    print('The best model is combination Number --> {:.0f}'.format(x))
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)
    print(Model_list[x])
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)


------------------------------The time 1 ***
This Combination Models: ('GCN', 'GAT', 'AttentiveFP', 'GraphSAGE')
------------------------------Combination Model Top------------------------------
ACC: 0.6471
F1: 0.7656
BACC: 0.5833
Precision: 0.6203
Recall: 1.0000
MCC: 0.6471
SP: 0.1667
AUC: 0.7997
------------------------------The time 2 ***
This Combination Models: ('GCN', 'GAT', 'AttentiveFP', 'RF_MACCS')
------------------------------Combination Model Top------------------------------
ACC: 0.5725
F1: 0.7169
BACC: 0.5064
Precision: 0.5798
Recall: 0.9388
MCC: 0.5725
SP: 0.0741
AUC: 0.5079
------------------------------The time 3 ***
This Combination Models: ('GCN', 'GAT', 'AttentiveFP', 'RF_Morgan')
------------------------------Combination Model Top------------------------------
ACC: 0.5725
F1: 0.7169
BACC: 0.5064
Precision: 0.5798
Recall: 0.9388
MCC: 0.5725
SP: 0.0741
AUC: 0.5055
------------------------------The time 4 ***
This Combination Models: ('GCN', 'GAT', 'GraphSAGE', 'RF_MA

In [29]:
# chose five of six
a = 1
Model_list = list(itertools.combinations(['GCN', 'GAT', 'AttentiveFP', 'GraphSAGE', 'RF_MACCS', 'RF_Morgan'], 5))
Comb_AUC = []
Model_index = []
Model_names = []
Model_AUC = []
Model_ACC = []
Model_F1 = []
Model_BACC = []
Model_Precision = []
Model_Recall= []
Model_MCC = []
Model_SP = []
for i in Model_list:
    print('-' * 30 + 'The time {}'.format(a) + ' ***')
    pred1_name = pred_read(i[0])
    pred1 = locals()[pred1_name]
    prob1_name = prob_read(i[0])
    prob1 = locals()[prob1_name]

    pred2_name = pred_read(i[1])
    pred2 = locals()[pred2_name]
    prob2_name = prob_read(i[1])
    prob2 = locals()[prob2_name]

    pred3_name = pred_read(i[2])
    pred3 = locals()[pred3_name]
    prob3_name = prob_read(i[2])
    prob3 = locals()[prob3_name]

    pred4_name = pred_read(i[3])
    pred4 = locals()[pred4_name]
    prob4_name = prob_read(i[3])
    prob4 = locals()[prob4_name]

    pred5_name = pred_read(i[4])
    pred5 = locals()[pred5_name]
    prob5_name = prob_read(i[4])
    prob5 = locals()[prob5_name]

    Model_index.append(a)
    a = a + 1
    print('This Combination Models: {}'.format(i))

    y_pred_comb = []
    y_prob_comb = []
    for j in range(Num_test):
        comb_y_pred = pred1[j] + pred2[j] + pred3[j] + pred4[j] + pred5[j]
        comb_y_prob = np.mean(prob1[j] + prob2[j] + prob3[j] + prob4[j] + prob5[j])
        y_prob_comb.append(comb_y_prob)
        # 2/5 ---> label =1
        if int(comb_y_prob) >= 0.5:
            y_pred_comb.append(1)
        else:
            y_pred_comb.append(0)
    print('-' * 30 + 'Combination Model Top' + '-' * 30)
    print('ACC: {:.4f}'.format(accuracy(y_true, y_pred_comb)))
    print('F1: {:.4f}'.format(f1(y_true, y_pred_comb)))
    print('BACC: {:.4f}'.format(BACC(y_true, y_pred_comb)))
    print('Precision: {:.4f}'.format(precision(y_true, y_pred_comb)))
    print('Recall: {:.4f}'.format(recall(y_true, y_pred_comb)))
    print('MCC: {:.4f}'.format(mcc(y_true, y_pred_comb)))
    print('SP: {:.4f}'.format(sp(y_true, y_pred_comb)))
    print('AUC: {:.4f}'.format(auc(y_true, y_prob_comb)))

    AUC = auc(y_true, y_prob_comb)
    Comb_AUC.append(AUC)
print('Combination Model AUC List: ')
print(Comb_AUC)

# Pick the best model by AUC performance
Max_AUC_location = np.where(Comb_AUC == np.max(Comb_AUC))
index_Max = Max_AUC_location[0].tolist()
for x in index_Max:
    print('The best model is combination Number --> {:.0f}'.format(x))
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)
    print(Model_list[x])
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)


------------------------------The time 1 ***
This Combination Models: ('GCN', 'GAT', 'AttentiveFP', 'GraphSAGE', 'RF_MACCS')
------------------------------Combination Model Top------------------------------
ACC: 0.6078
F1: 0.7462
BACC: 0.5370
Precision: 0.5951
Recall: 1.0000
MCC: 0.2100
SP: 0.0741
AUC: 0.7796
------------------------------The time 2 ***
This Combination Models: ('GCN', 'GAT', 'AttentiveFP', 'GraphSAGE', 'RF_Morgan')
------------------------------Combination Model Top------------------------------
ACC: 0.6078
F1: 0.7462
BACC: 0.5370
Precision: 0.5951
Recall: 1.0000
MCC: 0.2100
SP: 0.0741
AUC: 0.7724
------------------------------The time 3 ***
This Combination Models: ('GCN', 'GAT', 'AttentiveFP', 'RF_MACCS', 'RF_Morgan')
------------------------------Combination Model Top------------------------------
ACC: 0.5765
F1: 0.7245
BACC: 0.5061
Precision: 0.5796
Recall: 0.9660
MCC: 0.0313
SP: 0.0463
AUC: 0.5103
------------------------------The time 4 ***
This Combination Mode

In [30]:
# All six
a = 1
Model_list = list(itertools.combinations(['GCN', 'GAT', 'AttentiveFP', 'GraphSAGE', 'RF_MACCS', 'RF_Morgan'], 6))
Comb_AUC = []
Model_index = []
Model_names = []
Model_AUC = []
Model_ACC = []
Model_F1 = []
Model_BACC = []
Model_Precision = []
Model_Recall= []
Model_MCC = []
Model_SP = []
for i in Model_list:
    print('-' * 30 + 'The time {}'.format(a) + ' ***')
    pred1_name = pred_read(i[0])
    pred1 = locals()[pred1_name]
    prob1_name = prob_read(i[0])
    prob1 = locals()[prob1_name]

    pred2_name = pred_read(i[1])
    pred2 = locals()[pred2_name]
    prob2_name = prob_read(i[1])
    prob2 = locals()[prob2_name]

    pred3_name = pred_read(i[2])
    pred3 = locals()[pred3_name]
    prob3_name = prob_read(i[2])
    prob3 = locals()[prob3_name]

    pred4_name = pred_read(i[3])
    pred4 = locals()[pred4_name]
    prob4_name = prob_read(i[3])
    prob4 = locals()[prob4_name]

    pred5_name = pred_read(i[4])
    pred5 = locals()[pred5_name]
    prob5_name = prob_read(i[4])
    prob5 = locals()[prob4_name]

    pred6_name = pred_read(i[5])
    pred6 = locals()[pred6_name]
    prob6_name = prob_read(i[5])
    prob6 = locals()[prob6_name]

    Model_index.append(a)
    a = a + 1
    print('This Combination Models: {}'.format(i))

    y_pred_comb = []
    y_prob_comb = []
    for j in range(Num_test):
        comb_y_pred = pred1[j] + pred2[j] + pred3[j] + pred4[j] + pred5[j] + pred6[j]
        comb_y_prob = np.mean(prob1[j] + prob2[j] + prob3[j] + prob4[j] + prob5[j] + prob6[j])
        y_prob_comb.append(comb_y_prob)
        # 3/6 ---> label =1
        if int(comb_y_prob) >= 0.5:
            y_pred_comb.append(1)
        else:
            y_pred_comb.append(0)
    print('-' * 30 + 'Combination Model Top' + '-' * 30)
    print('ACC: {:.4f}'.format(accuracy(y_true, y_pred_comb)))
    print('F1: {:.4f}'.format(f1(y_true, y_pred_comb)))
    print('BACC: {:.4f}'.format(BACC(y_true, y_pred_comb)))
    print('Precision: {:.4f}'.format(precision(y_true, y_pred_comb)))
    print('Recall: {:.4f}'.format(recall(y_true, y_pred_comb)))
    print('MCC: {:.4f}'.format(mcc(y_true, y_pred_comb)))
    print('SP: {:.4f}'.format(sp(y_true, y_pred_comb)))
    print('AUC: {:.4f}'.format(auc(y_true, y_prob_comb)))

    AUC = auc(y_true, y_prob_comb)
    Comb_AUC.append(AUC)
print('Combination Model AUC List: ')
print(Comb_AUC)

# Pick the best model by AUC performance
Max_AUC_location = np.where(Comb_AUC == np.max(Comb_AUC))
index_Max = Max_AUC_location[0].tolist()
for x in index_Max:
    print('The best model is combination Number --> {:.0f}'.format(x))
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)
    print(Model_list[x])
    print('*- '*5 + 'Here is the best combination models' + ' -*'*5)


------------------------------The time 1 ***
This Combination Models: ('GCN', 'GAT', 'AttentiveFP', 'GraphSAGE', 'RF_MACCS', 'RF_Morgan')
------------------------------Combination Model Top------------------------------
ACC: 0.6078
F1: 0.7462
BACC: 0.5370
Precision: 0.5951
Recall: 1.0000
MCC: 0.2100
SP: 0.0741
AUC: 0.9172
Combination Model AUC List: 
[0.9171705719324768]
The best model is combination Number --> 0
*- *- *- *- *- Here is the best combination models -* -* -* -* -*
('GCN', 'GAT', 'AttentiveFP', 'GraphSAGE', 'RF_MACCS', 'RF_Morgan')
*- *- *- *- *- Here is the best combination models -* -* -* -* -*
