In [1]:
import numpy as np
import random
import xlrd
import math
import matplotlib.pyplot as plt
from sklearn.cluster import MiniBatchKMeans
from xgboost.sklearn import XGBClassifier
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, roc_curve, silhouette_score
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier

In [2]:
#Save or read variables
import pickle
def save_variable(v,filename):
    f=open(filename,'wb')
    pickle.dump(v,f)
    f.close()
    return filename
  
def load_variavle(filename):
    f=open(filename,'rb')
    r=pickle.load(f)
    f.close()
    return r

In [3]:
data_path='./data/'
DSS1 = np.loadtxt(data_path + 'DSS1.txt')
DSS2 = np.loadtxt(data_path + 'DSS2.txt')
#Integrated semantic similarity for diseases
DSS = (DSS1 + DSS2) / 2
#Gaussian interaction profile kernel similarity for diseases
DGS = np.loadtxt(data_path + 'DGS.txt')
#Integrated similarity for diseases
IDS = np.zeros(shape = (DSS.shape[0], DSS.shape[1]))
for i in range(DSS.shape[0]):
    for j in range(DSS.shape[1]):
        if DSS[i][j] == 0:
            IDS[i][j] = DGS[i][j]
        else:       
            IDS[i][j] = DSS[i][j]
#Functional similarity for miRNAs          
MFS = np.loadtxt(data_path + 'MFS.txt')
#Gaussian interaction profile kernel similarity for miRNAs
MGS = np.loadtxt(data_path + 'MGS.txt')
#Integrated similarity for miRNAs
IMS = np.zeros(shape = (MFS.shape[0], MFS.shape[1]))
for i in range(MFS.shape[0]):
    for j in range(MFS.shape[1]):
        if MFS[i][j] == 0:
            IMS[i][j] = MGS[i][j]
        else:
            IMS[i][j] = MFS[i][j]
#miRNA-disease associations matrix
MD = np.zeros(shape = (DSS.shape[0], MFS.shape[0]))
asso_file =  xlrd.open_workbook(data_path + 'Human miRNA-disease associations.xlsx')
asso_pairs = asso_file.sheets()[0]
for i in range(asso_pairs.nrows):
    asso = asso_pairs.row_values(i)
    m = int(asso[0])
    n = int(asso[1])
    MD[n-1,m-1]=1
#Verified miRNA disease pair
known=[]
#Unverified miRNA disease pair
unknown=[]
for x in range(MD.shape[0]):
    for y in range(MD.shape[1]):
        if MD[x,y]==0:
            unknown.append((x,y))
        else:
            known.append((x,y))

In [14]:
#position sample set
posi_list = []
#unlabeled sample set
unlabelled_list = []
#total sample set
all_list = []

for i in range(len(known)):
    posi=IDS[known[i][0],:].tolist() + IMS[known[i][1],:].tolist()
    posi_list.append(posi)
    all_list.append(posi)

for i in range(len(unknown)):
    unlabelled=IDS[unknown[i][0],:].tolist() + IMS[unknown[i][1],:].tolist()
    unlabelled_list.append(unlabelled)
    all_list.append(unlabelled)

#The total sample set is disordered to avoid the influence of order on the clustering results
random.shuffle(all_list)

In [15]:
#Implement MiniBatchKMeans clustering algorithm ten times for total sample set

#Record the number of times each sample is divided into a certain cluster
sum = np.zeros(len(all_list),dtype=int)
#Record the final cluster of each sample
final = np.zeros(len(all_list),dtype=int)
for i in range(10):
    #According to our experiment, we set the number of clusters to 2
    cls = MiniBatchKMeans(n_clusters=2,batch_size=3072).fit(all_list)
    yhat = cls.predict(all_list)
    #According to the multiple clustering results of the total sample set, the total sample set is divided into subsets A and B
    #Among the two subsets A and B, subset A (B) is always greater than the size of subset A (B)
    #In order to ensure that subsets a (b) can obtain the same label after each clustering: 
    #suppose that the label of small subset is 1 and the label of large subset is 0
    #After clustering, when the number of samples with label 1 is greater than the number of samples with label 0, 
    #the labels of subsets A and B are exchanged
    if len(yhat[yhat==1]) > len(yhat[yhat==0]):
        trans = yhat==0
        yhat[yhat==1] = 0
        yhat[trans] = 1
    sum = sum + yhat
    
#When the number of times a sample gets a label of 1 is greater than or equal to 9, 
#it is considered that the label of this sample is 1
final[sum<9] = 0
final[sum>=9] = 1

In [24]:
#Organize the clustering results
clusters = np.unique(final)
subsets={}
for i in clusters:
    subset=[]
    for j in range(len(all_list)):
        if final[j] == i:
            subset.append(all_list[j])
    subsets[i]=subset

In [25]:
#Store positive sample index in each subset
index_lists=[]
#Store the number of positive samples in each subset
posi_cnt =[] 

for i in clusters:
    index_list=[]
    cnt=0
    for j in range(len(subsets[i])):
        if posi_list.__contains__(subsets[i][j]):
            cnt=cnt+1
            index_list.append(j)
    index_lists.append(index_list)
    posi_cnt.append(cnt)

In [29]:
#Find the subset with the least proportion of positive samples
min_per=1
min_idx=0
print('The number of the total sample set is %d, of which the number of positive samples is %d' %(len(all_list),len(posi_list)))
for i in range(len(posi_cnt)):
    t_per=posi_cnt[i]/len(subsets[i])
    print('The total number of samples in subset%d is %d, of which the number of positive samples is %d, and the proportion of positive samples is %f' %(i,len(subsets[i]),posi_cnt[i],t_per))
    if t_per < min_per:
        min_per=t_per
        min_idx=i
print('Subset%d has the least proportion of positive samples, accounting for %f' %(min_idx,min_per))

The number of the total sample set is 189585, of which the number of positive samples is 5430
The total number of samples in subset0 is 121411, of which the number of positive samples is 895, and the proportion of positive samples is 0.007372
The total number of samples in subset1 is 68174, of which the number of positive samples is 4535, and the proportion of positive samples is 0.066521
Subset0 has the least proportion of positive samples, accounting for 0.007372


In [30]:
#Remove the positive samples from the subset with the least number of positive samples, 
#and the remaining unmarked samples in the subset are regarded as negative samples
new_nega=np.delete(subsets[min_idx], index_lists[min_idx], axis=0)
new_nega=new_nega.tolist()

In [31]:
#The negative sample set was divided into two parts according to positive_num
def spliting_negative_data(negative_data, positive_num):
    negative_train_data = negative_data[positive_num:]
    negative_cv_data = negative_data[:positive_num]
    return negative_train_data, negative_cv_data

In [32]:
# Feature selection based on random forest feature importance score
def feature_ranking_by_rf(data, label, sel_fea_num, sel_hp1, sel_hp2):
    fs_rf = RandomForestClassifier(n_estimators=sel_hp1, max_depth=sel_hp2, random_state=0)
    #Training random forest model
    fs_rf.fit(data, label)
    importances = fs_rf.feature_importances_
    #Sort all features in the reverse order of feature importance scores, and return the sorted index value
    indices = np.argsort(importances)[::-1]
    #Extract the top sel_fea_num features with the highest feature importance score
    most_imp = indices[:sel_fea_num]
    return most_imp

In [66]:
#The prediction score of each individual learner to the samples in the test set after applying the feature selection method
def base_probs_fs(X_test, most_imps_list, trained_clfs):
    prob_list = []
    for i,clf in enumerate(trained_clfs):
        #Index value of key features
        most_imp = most_imps_list[i]
        #Extract key features from test set
        X_test_fs = X_test[:,most_imp]
        #Prediction score of individual learner on test set
        prob = clf.predict_proba(X_test_fs)
        prob_list.append(prob[:,1])
    prob_list = np.array(prob_list)
    base_probs = np.transpose(prob_list)
    return base_probs

In [67]:
#The prediction score of each individual learner to the samples in the test set without applying the feature selection method
def base_probs(X_test, trained_clfs):
    prob_list = []
    for clf in trained_clfs:
        prob = clf.predict_proba(X_test)
        prob_list.append(prob[:,1])
    prob_list = np.array(prob_list)
    base_probs = np.transpose(prob_list)
    return base_probs

In [52]:
#XGBoost as individual learners
def base_xgb_learners(base_learner_num):
    clfs = []
    for i in range(base_learner_num):
        clfs.append(XGBClassifier(max_depth=6,learning_rate=0.4,n_estimators=100))
    return clfs

In [53]:
#RandomForest as individual learners
def base_rf_learners(base_learner_num):
    clfs = []
    for i in range(base_learner_num):
        clfs.append(RandomForestClassifier(n_estimators=400, max_depth=40))
    return clfs

In [54]:
#ExtraTrees as individual learners
def base_ert_learners(base_learner_num):
    clfs = []
    for i in range(base_learner_num):
        clfs.append(ExtraTreesClassifier(n_estimators=400, max_depth=20))
    return clfs

In [55]:
#AdaBoost as individual learners
def base_ab_learners(base_learner_num):
    clfs = []
    for i in range(base_learner_num):
        clfs.append(AdaBoostClassifier(n_estimators=50, learning_rate=0.35))
    return clfs

In [56]:
#soft voting strategy
def soft_voting_strategy(base_probs):
    print('\nSoft voting ...\n')
    #Store the final prediction score of each sample
    pred_final = []
    #Store the final prediction result of each sample
    prob_final = []
    for prob in base_probs:
        #Calculate the average of all individual learner prediction scores
        mean_prob = np.mean(prob)
        prob_final.append(mean_prob)
        if mean_prob > 0.5:
            pred_final.append(1)
        else:
            pred_final.append(0)
    return pred_final, prob_final

In [57]:
#Performance calculation
def calculate_performace(num, y_pred, y_prob, y_test):
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    for index in range(num):
        if y_test[index] ==1:
            if y_test[index] == y_pred[index]:
                tp = tp + 1
            else:
                fn = fn + 1
        else:
            if y_test[index] == y_pred[index]:
                tn = tn + 1
            else:
                fp = fp + 1
    acc = float(tp + tn)/num
    try:
        precision = float(tp)/(tp + fp)
        recall = float(tp)/ (tp + fn)
        f1_score = float((2*precision*recall)/(precision+recall))
    except ZeroDivisionError:
        print("You can't divide by 0.")
        precision=recall=f1_score = 100
    AUC = roc_auc_score(y_test, y_prob)
    p, r, _ = precision_recall_curve(y_test,y_prob)
    AUPR = auc(r, p)
    return tp, fp, tn, fn, acc, precision, recall, f1_score, AUC, AUPR

In [71]:
fold_num=5
sel_fea = 'RF'
prop = 0.75
base_learner_num = 10

#Hyper-parameters of Stochastic Forest Model
sel_fea_num = int(prop*len(all_list[0]))
sel_hp1 = 300
sel_hp2 = 30

posi_data = posi_list
nega_data = new_nega
posi_num = len(posi_data)
nega_train_data, nega_cv_data = spliting_negative_data(nega_data, posi_num)

soft_acc_list = []
soft_prec_list = []
soft_recall_list = []
soft_f1_score_list = []
soft_auc_list = []
soft_aupr_list = []

for fold in range(fold_num):
    #Positive training sample
    posi_train_data = np.array([x for i, x in enumerate(posi_data) if i % fold_num != fold])
    #Positive test sample
    posi_test_data = np.array([x for i, x in enumerate(posi_data) if i % fold_num == fold])
    #Positive test sample negative
    nega_test_data = np.array([x for i, x in enumerate(nega_cv_data) if i % fold_num == fold])
    #Generate test set
    X_test = np.concatenate((posi_test_data, nega_test_data))
    y_test = np.concatenate((np.ones(posi_test_data.shape[0]), np.zeros(nega_test_data.shape[0])))
    #Generate base_learner_num individual learners
    base_learners = base_xgb_learners(base_learner_num)
    trained_clfs = []
    most_imps_list = []
    for i in range(base_learner_num):
        print('Round', fold+1, 'cross validation\n')
        print('the', i+1, 'th individual learner training\n')
        sample_num = posi_train_data.shape[0]
        samples = random.sample(nega_train_data, sample_num)
        samples = np.array(samples)
        X_base_train = np.concatenate((posi_train_data, samples))
        y_base_train = np.concatenate((np.ones(posi_train_data.shape[0]), np.zeros(samples.shape[0])))
        if sel_fea == 'RF':
            print('Feature selection in progress\n')
            most_imps = feature_ranking_by_rf(X_base_train, y_base_train, sel_fea_num, sel_hp1, sel_hp2)
            most_imps_list.append(most_imps)
            X_base_train = X_base_train[:,most_imps]
            X_base_test = X_test[:,most_imps]
        else:
            X_base_train = X_base_train
            X_base_test = X_test
        clf = base_learners[i]
        clf.fit(X_base_train,y_base_train)
        trained_clfs.append(clf)
        y_pred = clf.predict(X_base_test)
        y_prob = clf.predict_proba(X_base_test)
        y_prob = y_prob[:,1]
        tp, fp, tn, fn, acc, prec, recall, f1_score, auroc, aupr = calculate_performace(len(y_pred), y_pred, y_prob, y_test) 
        print('the', i+1, 'th individual learner proformance: \n  Acc = \t', acc, 
              '\n  prec = \t', prec, 
              '\n  recall = \t', recall, 
              '\n  f1_score = \t', f1_score, 
              '\n  AUC = \t', auroc, 
              '\n  aupr = \t', aupr, 
              '\n')
        
    if sel_fea == 'RF':
        base_probs = base_probs_fs(X_test, most_imps_list, trained_clfs)
    else:
        base_probs = base_probs(X_test, trained_clfs)
    #soft voting strategy
    pred_final, prob_final = soft_voting_strategy(base_probs)
    soft_tp, soft_fp, soft_tn, soft_fn, soft_acc, soft_prec, soft_recall, soft_f1_score, soft_auc, soft_aupr = calculate_performace(len(pred_final), pred_final, prob_final, y_test)
    print('Round ', fold, 'cross validation proformance after soft voting: \n  Acc = \t', soft_acc, 
          '\n  prec = \t', soft_prec, 
          '\n  recall = \t', soft_recall, 
          '\n  f1_score = \t', soft_f1_score, 
          '\n  AUC = \t', soft_auc, 
          '\n  AUPR = \t', soft_aupr,
          '\n')
    soft_acc_list.append(soft_acc)
    soft_prec_list.append(soft_prec)
    soft_recall_list.append(soft_recall)
    soft_f1_score_list.append(soft_f1_score)
    soft_auc_list.append(soft_auc)
    soft_aupr_list.append(soft_aupr)

soft_acc_arr = np.array(soft_acc_list)
soft_prec_arr = np.array(soft_prec_list)
soft_recall_arr = np.array(soft_recall_list)
soft_f1_score_arr = np.array(soft_f1_score_list)
soft_auc_arr = np.array(soft_auc_list)
soft_aupr_arr = np.array(soft_aupr_list)
        
soft_ave_acc = np.mean(soft_acc_arr)
soft_ave_prec = np.mean(soft_prec_arr)
soft_ave_recall = np.mean(soft_recall_arr)
soft_ave_f1_score = np.mean(soft_f1_score_arr)
soft_ave_auc = np.mean(soft_auc_arr)
soft_ave_aupr = np.mean(soft_aupr_arr)

soft_std_acc = np.std(soft_acc_arr)
soft_std_prec = np.std(soft_prec_arr)
soft_std_recall = np.std(soft_recall_arr)
soft_std_f1_score = np.std(soft_f1_score_arr)
soft_std_auc = np.std(soft_auc_arr)
soft_std_aupr = np.std(soft_aupr_arr)

print('CSMDA Final proformance: \n  Acc = \t', soft_ave_acc, 
      '\n  prec = \t', soft_ave_prec, 
      '\n  recall = \t', soft_ave_recall, 
      '\n  f1_score = \t', soft_ave_f1_score, 
      '\n  AUC = \t', soft_ave_auc, 
      '\n  AUPR = \t', soft_ave_aupr)

Round 1 cross validation

the 1 th individual learner training

Feature selection in progress





the 1 th individual learner proformance: 
  Acc = 	 0.962707182320442 
  prec = 	 0.96398891966759 
  recall = 	 0.9613259668508287 
  f1_score = 	 0.9626556016597511 
  AUC = 	 0.992465634952128 
  aupr = 	 0.9937499946378503 

Round 1 cross validation

the 2 th individual learner training

Feature selection in progress





the 2 th individual learner proformance: 
  Acc = 	 0.9636279926335175 
  prec = 	 0.9649122807017544 
  recall = 	 0.9622467771639043 
  f1_score = 	 0.9635776855693868 
  AUC = 	 0.9921586981811029 
  aupr = 	 0.9935656953271996 

Round 1 cross validation

the 3 th individual learner training

Feature selection in progress





the 3 th individual learner proformance: 
  Acc = 	 0.9585635359116023 
  prec = 	 0.9628252788104089 
  recall = 	 0.9539594843462247 
  f1_score = 	 0.9583718778908417 
  AUC = 	 0.9928505777533585 
  aupr = 	 0.9938855172941166 

Round 1 cross validation

the 4 th individual learner training

Feature selection in progress





the 4 th individual learner proformance: 
  Acc = 	 0.9636279926335175 
  prec = 	 0.9632014719411224 
  recall = 	 0.9640883977900553 
  f1_score = 	 0.9636447307869305 
  AUC = 	 0.9922112674623281 
  aupr = 	 0.9936562222184856 

Round 1 cross validation

the 5 th individual learner training

Feature selection in progress





the 5 th individual learner proformance: 
  Acc = 	 0.9594843462246777 
  prec = 	 0.956959706959707 
  recall = 	 0.9622467771639043 
  f1_score = 	 0.9595959595959596 
  AUC = 	 0.9926911741264173 
  aupr = 	 0.9938500599037708 

Round 1 cross validation

the 6 th individual learner training

Feature selection in progress





the 6 th individual learner proformance: 
  Acc = 	 0.9590239410681399 
  prec = 	 0.9628597957288765 
  recall = 	 0.9548802946593001 
  f1_score = 	 0.9588534442903375 
  AUC = 	 0.9924240882621272 
  aupr = 	 0.9936769987919324 

Round 1 cross validation

the 7 th individual learner training

Feature selection in progress





the 7 th individual learner proformance: 
  Acc = 	 0.9594843462246777 
  prec = 	 0.9594843462246777 
  recall = 	 0.9594843462246777 
  f1_score = 	 0.9594843462246777 
  AUC = 	 0.9924469813362093 
  aupr = 	 0.9936709111584046 

Round 1 cross validation

the 8 th individual learner training

Feature selection in progress





the 8 th individual learner proformance: 
  Acc = 	 0.9567219152854513 
  prec = 	 0.9542124542124543 
  recall = 	 0.9594843462246777 
  f1_score = 	 0.9568411386593205 
  AUC = 	 0.9924147614541682 
  aupr = 	 0.9936040024296637 

Round 1 cross validation

the 9 th individual learner training

Feature selection in progress





the 9 th individual learner proformance: 
  Acc = 	 0.9567219152854513 
  prec = 	 0.9618249534450651 
  recall = 	 0.9511970534069981 
  f1_score = 	 0.9564814814814815 
  AUC = 	 0.9923494737984527 
  aupr = 	 0.9934666255376695 

Round 1 cross validation

the 10 th individual learner training

Feature selection in progress





the 10 th individual learner proformance: 
  Acc = 	 0.9576427255985267 
  prec = 	 0.9568014705882353 
  recall = 	 0.9585635359116023 
  f1_score = 	 0.9576816927322906 
  AUC = 	 0.9925436409823333 
  aupr = 	 0.9935363965551359 


Soft voting ...

Round  0 cross validation proformance after soft voting: 
  Acc = 	 0.9622467771639043 
  prec = 	 0.966542750929368 
  recall = 	 0.9576427255985267 
  f1_score = 	 0.9620721554116558 
  AUC = 	 0.9936687931788815 
  AUPR = 	 0.9947509196520367 

Round 2 cross validation

the 1 th individual learner training

Feature selection in progress





the 1 th individual learner proformance: 
  Acc = 	 0.9548802946593001 
  prec = 	 0.960820895522388 
  recall = 	 0.9484346224677717 
  f1_score = 	 0.9545875810936052 
  AUC = 	 0.993108336809689 
  aupr = 	 0.9939129841899232 

Round 2 cross validation

the 2 th individual learner training

Feature selection in progress





the 2 th individual learner proformance: 
  Acc = 	 0.9599447513812155 
  prec = 	 0.9620721554116559 
  recall = 	 0.9576427255985267 
  f1_score = 	 0.9598523304107062 
  AUC = 	 0.9936357254052075 
  aupr = 	 0.9945548864989189 

Round 2 cross validation

the 3 th individual learner training

Feature selection in progress





the 3 th individual learner proformance: 
  Acc = 	 0.9608655616942909 
  prec = 	 0.9699530516431925 
  recall = 	 0.9511970534069981 
  f1_score = 	 0.9604834960483495 
  AUC = 	 0.9926606500276414 
  aupr = 	 0.993765299582878 

Round 2 cross validation

the 4 th individual learner training

Feature selection in progress





the 4 th individual learner proformance: 
  Acc = 	 0.9562615101289135 
  prec = 	 0.9643861293345829 
  recall = 	 0.9475138121546961 
  f1_score = 	 0.9558755225267068 
  AUC = 	 0.9928514256449911 
  aupr = 	 0.9938568842417442 

Round 2 cross validation

the 5 th individual learner training

Feature selection in progress





the 5 th individual learner proformance: 
  Acc = 	 0.9585635359116023 
  prec = 	 0.9671669793621013 
  recall = 	 0.9493554327808471 
  f1_score = 	 0.95817843866171 
  AUC = 	 0.9929904798727484 
  aupr = 	 0.9939370775938431 

Round 2 cross validation

the 6 th individual learner training

Feature selection in progress





the 6 th individual learner proformance: 
  Acc = 	 0.9548802946593001 
  prec = 	 0.9582560296846011 
  recall = 	 0.9511970534069981 
  f1_score = 	 0.9547134935304991 
  AUC = 	 0.9927344165996832 
  aupr = 	 0.993607851422988 

Round 2 cross validation

the 7 th individual learner training

Feature selection in progress





the 7 th individual learner proformance: 
  Acc = 	 0.9594843462246777 
  prec = 	 0.9689849624060151 
  recall = 	 0.9493554327808471 
  f1_score = 	 0.9590697674418606 
  AUC = 	 0.9931176636176483 
  aupr = 	 0.9941074320674828 

Round 2 cross validation

the 8 th individual learner training

Feature selection in progress





the 8 th individual learner proformance: 
  Acc = 	 0.9571823204419889 
  prec = 	 0.9653233364573571 
  recall = 	 0.9484346224677717 
  f1_score = 	 0.9568044588945657 
  AUC = 	 0.9939519889841919 
  aupr = 	 0.994724849394144 

Round 2 cross validation

the 9 th individual learner training

Feature selection in progress





the 9 th individual learner proformance: 
  Acc = 	 0.9567219152854513 
  prec = 	 0.9652908067542214 
  recall = 	 0.9475138121546961 
  f1_score = 	 0.9563197026022305 
  AUC = 	 0.9929124738425431 
  aupr = 	 0.9938998981402061 

Round 2 cross validation

the 10 th individual learner training

Feature selection in progress





the 10 th individual learner proformance: 
  Acc = 	 0.9604051565377533 
  prec = 	 0.9672897196261683 
  recall = 	 0.9530386740331491 
  f1_score = 	 0.9601113172541744 
  AUC = 	 0.993791737465618 
  aupr = 	 0.9945870219627587 


Soft voting ...

Round  1 cross validation proformance after soft voting: 
  Acc = 	 0.9604051565377533 
  prec = 	 0.9699248120300752 
  recall = 	 0.9502762430939227 
  f1_score = 	 0.96 
  AUC = 	 0.994200421232563 
  AUPR = 	 0.9949623582920759 

Round 3 cross validation

the 1 th individual learner training

Feature selection in progress





the 1 th individual learner proformance: 
  Acc = 	 0.9576427255985267 
  prec = 	 0.9644859813084112 
  recall = 	 0.9502762430939227 
  f1_score = 	 0.9573283858998145 
  AUC = 	 0.9913362432974164 
  aupr = 	 0.9927581812738054 

Round 3 cross validation

the 2 th individual learner training

Feature selection in progress





the 2 th individual learner proformance: 
  Acc = 	 0.9558011049723757 
  prec = 	 0.9558011049723757 
  recall = 	 0.9558011049723757 
  f1_score = 	 0.9558011049723757 
  AUC = 	 0.9911666649708833 
  aupr = 	 0.9925314722675324 

Round 3 cross validation

the 3 th individual learner training

Feature selection in progress





the 3 th individual learner proformance: 
  Acc = 	 0.9548802946593001 
  prec = 	 0.955719557195572 
  recall = 	 0.9539594843462247 
  f1_score = 	 0.9548387096774194 
  AUC = 	 0.9916770957337485 
  aupr = 	 0.9929105720694453 

Round 3 cross validation

the 4 th individual learner training

Feature selection in progress





the 4 th individual learner proformance: 
  Acc = 	 0.9585635359116023 
  prec = 	 0.9611111111111111 
  recall = 	 0.9558011049723757 
  f1_score = 	 0.9584487534626039 
  AUC = 	 0.9911937975031289 
  aupr = 	 0.992566843714421 

Round 3 cross validation

the 5 th individual learner training

Feature selection in progress





the 5 th individual learner proformance: 
  Acc = 	 0.9530386740331491 
  prec = 	 0.9555555555555556 
  recall = 	 0.9502762430939227 
  f1_score = 	 0.9529085872576178 
  AUC = 	 0.9906926935482231 
  aupr = 	 0.9921220016352316 

Round 3 cross validation

the 6 th individual learner training

Feature selection in progress





the 6 th individual learner proformance: 
  Acc = 	 0.9521178637200737 
  prec = 	 0.9537892791127541 
  recall = 	 0.9502762430939227 
  f1_score = 	 0.9520295202952029 
  AUC = 	 0.9904205203341372 
  aupr = 	 0.992078568769249 

Round 3 cross validation

the 7 th individual learner training

Feature selection in progress





the 7 th individual learner proformance: 
  Acc = 	 0.9553406998158379 
  prec = 	 0.9532538955087076 
  recall = 	 0.9576427255985267 
  f1_score = 	 0.9554432705558109 
  AUC = 	 0.9913820294455806 
  aupr = 	 0.9928539159493097 

Round 3 cross validation

the 8 th individual learner training

Feature selection in progress





the 8 th individual learner proformance: 
  Acc = 	 0.9544198895027625 
  prec = 	 0.9548387096774194 
  recall = 	 0.9539594843462247 
  f1_score = 	 0.954398894518655 
  AUC = 	 0.9913226770312941 
  aupr = 	 0.9928702998854557 

Round 3 cross validation

the 9 th individual learner training

Feature selection in progress





the 9 th individual learner proformance: 
  Acc = 	 0.9590239410681399 
  prec = 	 0.9611470860314524 
  recall = 	 0.9567219152854513 
  f1_score = 	 0.9589293954776189 
  AUC = 	 0.9913044473611916 
  aupr = 	 0.9928011039946012 

Round 3 cross validation

the 10 th individual learner training

Feature selection in progress





the 10 th individual learner proformance: 
  Acc = 	 0.9558011049723757 
  prec = 	 0.9634831460674157 
  recall = 	 0.9475138121546961 
  f1_score = 	 0.9554317548746517 
  AUC = 	 0.9905375293794451 
  aupr = 	 0.9920551426800286 


Soft voting ...

Round  2 cross validation proformance after soft voting: 
  Acc = 	 0.9599447513812155 
  prec = 	 0.9637883008356546 
  recall = 	 0.9558011049723757 
  f1_score = 	 0.9597780859916782 
  AUC = 	 0.9922672283100842 
  AUPR = 	 0.993627838186911 

Round 4 cross validation

the 1 th individual learner training

Feature selection in progress





the 1 th individual learner proformance: 
  Acc = 	 0.9594843462246777 
  prec = 	 0.970754716981132 
  recall = 	 0.9475138121546961 
  f1_score = 	 0.9589934762348555 
  AUC = 	 0.9914549481259899 
  aupr = 	 0.9926869661381762 

Round 4 cross validation

the 2 th individual learner training

Feature selection in progress





the 2 th individual learner proformance: 
  Acc = 	 0.9534990791896869 
  prec = 	 0.9624413145539906 
  recall = 	 0.9438305709023941 
  f1_score = 	 0.9530450953045095 
  AUC = 	 0.9903671031612792 
  aupr = 	 0.9918504484303247 

Round 4 cross validation

the 3 th individual learner training

Feature selection in progress





the 3 th individual learner proformance: 
  Acc = 	 0.9488950276243094 
  prec = 	 0.9577464788732394 
  recall = 	 0.9392265193370166 
  f1_score = 	 0.9483960948396094 
  AUC = 	 0.9891630970428932 
  aupr = 	 0.9909653289230775 

Round 4 cross validation

the 4 th individual learner training

Feature selection in progress





the 4 th individual learner proformance: 
  Acc = 	 0.9567219152854513 
  prec = 	 0.9679245283018868 
  recall = 	 0.9447513812154696 
  f1_score = 	 0.9561975768872321 
  AUC = 	 0.9906146875180176 
  aupr = 	 0.9921122267070746 

Round 4 cross validation

the 5 th individual learner training

Feature selection in progress





the 5 th individual learner proformance: 
  Acc = 	 0.9562615101289135 
  prec = 	 0.9661335841956726 
  recall = 	 0.9456721915285451 
  f1_score = 	 0.9557933922754769 
  AUC = 	 0.9904069540680145 
  aupr = 	 0.9920172777309104 

Round 4 cross validation

the 6 th individual learner training

Feature selection in progress





the 6 th individual learner proformance: 
  Acc = 	 0.9576427255985267 
  prec = 	 0.967984934086629 
  recall = 	 0.9465930018416207 
  f1_score = 	 0.957169459962756 
  AUC = 	 0.9902212658004605 
  aupr = 	 0.9918791554702294 

Round 4 cross validation

the 7 th individual learner training

Feature selection in progress





the 7 th individual learner proformance: 
  Acc = 	 0.9525782688766115 
  prec = 	 0.9606373008434864 
  recall = 	 0.9438305709023941 
  f1_score = 	 0.9521597770552717 
  AUC = 	 0.9905968817937317 
  aupr = 	 0.9918510634646125 

Round 4 cross validation

the 8 th individual learner training

Feature selection in progress





the 8 th individual learner proformance: 
  Acc = 	 0.9539594843462247 
  prec = 	 0.9650943396226415 
  recall = 	 0.9419889502762431 
  f1_score = 	 0.9534016775396086 
  AUC = 	 0.9904747853986278 
  aupr = 	 0.9919305647601392 

Round 4 cross validation

the 9 th individual learner training

Feature selection in progress





the 9 th individual learner proformance: 
  Acc = 	 0.9507366482504604 
  prec = 	 0.9631031220435194 
  recall = 	 0.9373848987108656 
  f1_score = 	 0.9500699953336444 
  AUC = 	 0.9904985263643424 
  aupr = 	 0.9918277126155538 

Round 4 cross validation

the 10 th individual learner training

Feature selection in progress





the 10 th individual learner proformance: 
  Acc = 	 0.9493554327808471 
  prec = 	 0.9535315985130112 
  recall = 	 0.9447513812154696 
  f1_score = 	 0.9491211840888067 
  AUC = 	 0.9903230127963806 
  aupr = 	 0.9916587468476883 


Soft voting ...

Round  3 cross validation proformance after soft voting: 
  Acc = 	 0.9571823204419889 
  prec = 	 0.9706161137440759 
  recall = 	 0.9429097605893186 
  f1_score = 	 0.9565623540401682 
  AUC = 	 0.9917118592906878 
  AUPR = 	 0.9929861274970534 

Round 5 cross validation

the 1 th individual learner training

Feature selection in progress





the 1 th individual learner proformance: 
  Acc = 	 0.9640883977900553 
  prec = 	 0.9666666666666667 
  recall = 	 0.9613259668508287 
  f1_score = 	 0.9639889196675899 
  AUC = 	 0.9922044843292669 
  aupr = 	 0.9935378713014859 

Round 5 cross validation

the 2 th individual learner training

Feature selection in progress





the 2 th individual learner proformance: 
  Acc = 	 0.9604051565377533 
  prec = 	 0.9655493482309124 
  recall = 	 0.9548802946593001 
  f1_score = 	 0.9601851851851851 
  AUC = 	 0.9921459798066128 
  aupr = 	 0.9936752353024809 

Round 5 cross validation

the 3 th individual learner training

Feature selection in progress





the 3 th individual learner proformance: 
  Acc = 	 0.9631675874769797 
  prec = 	 0.9718574108818011 
  recall = 	 0.9539594843462247 
  f1_score = 	 0.9628252788104089 
  AUC = 	 0.9912972402823139 
  aupr = 	 0.9932500347669975 

Round 5 cross validation

the 4 th individual learner training

Feature selection in progress





the 4 th individual learner proformance: 
  Acc = 	 0.964548802946593 
  prec = 	 0.9701770736253494 
  recall = 	 0.9585635359116023 
  f1_score = 	 0.9643353404353867 
  AUC = 	 0.9924842885680466 
  aupr = 	 0.9938399800499242 

Round 5 cross validation

the 5 th individual learner training

Feature selection in progress





the 5 th individual learner proformance: 
  Acc = 	 0.9613259668508287 
  prec = 	 0.9604779411764706 
  recall = 	 0.9622467771639043 
  f1_score = 	 0.9613615455381785 
  AUC = 	 0.9915940023537472 
  aupr = 	 0.9931618454243665 

Round 5 cross validation

the 6 th individual learner training

Feature selection in progress





the 6 th individual learner proformance: 
  Acc = 	 0.9631675874769797 
  prec = 	 0.9666048237476809 
  recall = 	 0.9594843462246777 
  f1_score = 	 0.9630314232902033 
  AUC = 	 0.9922146590288587 
  aupr = 	 0.9936924405296705 

Round 5 cross validation

the 7 th individual learner training

Feature selection in progress





the 7 th individual learner proformance: 
  Acc = 	 0.9594843462246777 
  prec = 	 0.9637546468401487 
  recall = 	 0.9548802946593001 
  f1_score = 	 0.9592969472710453 
  AUC = 	 0.9912387357596599 
  aupr = 	 0.9928544798180818 

Round 5 cross validation

the 8 th individual learner training

Feature selection in progress





the 8 th individual learner proformance: 
  Acc = 	 0.9631675874769797 
  prec = 	 0.9674721189591078 
  recall = 	 0.9585635359116023 
  f1_score = 	 0.9629972247918595 
  AUC = 	 0.9917237297735451 
  aupr = 	 0.9933202873931489 

Round 5 cross validation

the 9 th individual learner training

Feature selection in progress





the 9 th individual learner proformance: 
  Acc = 	 0.9567219152854513 
  prec = 	 0.9635514018691589 
  recall = 	 0.9493554327808471 
  f1_score = 	 0.9564007421150279 
  AUC = 	 0.9920120129286516 
  aupr = 	 0.9933730505030618 

Round 5 cross validation

the 10 th individual learner training

Feature selection in progress





the 10 th individual learner proformance: 
  Acc = 	 0.9571823204419889 
  prec = 	 0.9592969472710453 
  recall = 	 0.9548802946593001 
  f1_score = 	 0.9570835256114444 
  AUC = 	 0.9918805897255882 
  aupr = 	 0.9933289250616584 


Soft voting ...

Round  4 cross validation proformance after soft voting: 
  Acc = 	 0.9654696132596685 
  prec = 	 0.9693593314763231 
  recall = 	 0.9613259668508287 
  f1_score = 	 0.9653259361997226 
  AUC = 	 0.9930099813802997 
  AUPR = 	 0.9944126614932424 

CSMDA Final proformance: 
  Acc = 	 0.961049723756906 
  prec = 	 0.9680462618030994 
  recall = 	 0.9535911602209947 
  f1_score = 	 0.9607477063286449 
  AUC = 	 0.9929716566785032 
  AUPR = 	 0.9941479810242638


In [73]:
soft_ave_acc=np.round(soft_ave_acc,4)
soft_ave_prec=np.round(soft_ave_prec,4)
soft_ave_recall=np.round(soft_ave_recall,4)
soft_ave_f1_score=np.round(soft_ave_f1_score,4)
soft_ave_auc=np.round(soft_ave_auc,4)
soft_ave_aupr=np.round(soft_ave_aupr,4)
print('  Acc = \t', soft_ave_acc, 
      '\n  prec = \t', soft_ave_prec, 
      '\n  recall = \t', soft_ave_recall, 
      '\n  f1_score = \t', soft_ave_f1_score, 
      '\n  AUC = \t', soft_ave_auc, 
      '\n  aupr = \t', soft_ave_aupr)

  Acc = 	 0.961 
  prec = 	 0.968 
  recall = 	 0.9536 
  f1_score = 	 0.9607 
  AUC = 	 0.993 
  aupr = 	 0.9941


In [75]:
soft_std_acc = np.round(soft_std_acc,4)
soft_std_prec = np.round(soft_std_prec,4)
soft_std_recall = np.round(soft_std_recall,4)
soft_std_f1_score = np.round(soft_std_f1_score,4)
soft_std_auc = np.round(soft_std_auc,4)
soft_std_aupr = np.round(soft_std_aupr,4)
print('  Acc standard deviation= \t', soft_std_acc, 
      '\n  prec standard deviation= \t', soft_std_prec, 
      '\n  recall standard deviation= \t', soft_std_recall, 
      '\n  f1_score standard deviation= \t', soft_std_f1_score, 
      '\n  AUC standard deviation= \t', soft_std_auc, 
      '\n  aupr standard deviation= \t', soft_std_aupr)

  Acc standard deviation= 	 0.0027 
  prec standard deviation= 	 0.0025 
  recall standard deviation= 	 0.0064 
  f1_score standard deviation= 	 0.0029 
  AUC standard deviation= 	 0.0009 
  aupr standard deviation= 	 0.0007
