In [None]:
import xlrd
from sklearn import preprocessing
import numpy as np
import pandas as pd
from sklearn import metrics

from keras.models import Sequential  
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers import MaxPooling1D, AveragePooling1D
from keras.layers.convolutional import Conv1D  
from keras.utils import np_utils

import pywt

# Set global constants
_SCALE_FLAG_ = 1 


#Read datas and labels
x=[]
y=[]

x=pd.read_csv('data/OV_finalMatrix.tsv',sep='\t',index_col="gene")
x=np.array(x)
print(x.shape)          #read datas

f=open("data/OV_clinical.csv")
for line in f:
    temp=line.split(",")
    y.append(temp[1].strip())

y=np.delete(y,0,0)
y=np.array(y)
print(len(y))          #read labels
    


#Take a wavelet transform
waveletname='db3'                      #commonly used wavelet functions
                                       #db1, db3, db5
                                       #coif1, coif3, coif5
                                       #bior3.1, bior3.3, bior3.5
                                       #sym2, sym4, sym6

signallen = x.shape[0]
samplenum = x.shape[1]
print("signallen："+str(signallen))
print("samplenum："+str(samplenum))

if signallen % 2 !=0:
    x = np.r_[x, np.zeros((1,samplenum))]

for i in range(samplenum):
    
    if np.mod(i,50) == 0:
        print ("Starting SWT transform: %d / %d"%(i, samplenum))
 
    data = x[:,i]
           
    for j in range(3):                  #The number of decomposition layers was set to 3                       
        coef = pywt.swt(data, waveletname, level=1)
        data = coef[0][0]

    if i==0:
        x_wavelet=data
    else:
        x_wavelet = np.c_[x_wavelet, data]


x=x_wavelet.T
print(x.shape)
    
# Scale data
if _SCALE_FLAG_ == 1:
    normalizer = preprocessing.Normalizer().fit(x)
    x = normalizer.transform(x)

In [None]:
# Set the constants
n_class = 2


_CUSTOM_FILTER_NUMBER_=[64, 64]
_CUSTOM_KERNEL_SIZE_=[4, 4]
_CUSTOM_POOL_SIZE_=[4, 4]
_CUSTOM_STRIDES_=2  
_CUSTOM_DROP_RATE_=0.5 
_CUSTOM_BATCH_SIZE_=80
_CUSTOM_EPOCHS_=200
_CUSTOM_SPLIT_RATE_=0.1
_CUSTOM_OPT_FUNCTION_='RMSprop'             # Usable functions:  SGD
#优化器                                      #                    RMSprop
                                            #                    Adagrad
                                            #                    Adadelta
                                            #                    Adam
                                            #                    Adamax
                                            #                    Nadam
_CUSTOM_LOSS_FUNCTION_='binary_crossentropy'    # Usable functions: mse / mean_squared_error
#目标函数                                        #                   mae / mean_absolute_error
                                                #                   mape / mean_absolute_percentage_error
                                                #                   msle / mean_squared_logarithmic_error
                                                #                   squared_hinge
                                                #                   hinge
                                                #                   categorical_hinge
                                                #                   binary_crossentropy
                                                #                   logcosh
                                                #                   categorical_crossentropy
                                                #                   cosine_proximity
_CUSTOM_ACT_FUNCTION_='relu'     # Usable functions:    softmax
#指定激活函数                     #                      elu
                                 #                      selu
                                 #                      softplus
                                 #                      softsign
                                 #                      relu
                                 #                      tanh
                                 #                      sigmoid
                                 #                      hard_sigmoid
                                 #                      linear
_CUSTOM_ACT_FUNCTION_OUTPUT_LAYER_='softmax'

In [None]:
from keras import optimizers
from keras.optimizers import SGD
from keras.layers.normalization import BatchNormalization
from keras import regularizers
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve
from sklearn.metrics import auc


sum_loss=0
sum_test_accuracy=0
sum_train_accuracy=0
sum_train_mcc=0
sum_test_mcc=0
sum_precision_train=0
sum_precision_test=0
sum_recall_train=0
sum_recall_test=0
sum_f1_train=0
sum_f1_test=0
sum_auc_train=0
sum_auc_test=0
sum_se_train=0
sum_se_test=0
sum_sp_train=0
sum_sp_test=0


trainning_times=100

for i in range(trainning_times):
    print("model training"+str(i+1)+":")
    
    
    #Split train set and test set
    train_x,test_x,train_y,test_y=train_test_split(x,y,test_size=0.3,random_state=None,stratify=y)
    
    
    #Reshape the training data and test data
    Mtraindata = np.expand_dims(train_x, axis = 2)
    Mtestdata = np.expand_dims(test_x, axis = 2)
    datashape = Mtraindata.shape
    featureLen = datashape[1]
    input_dim = 1
    
    # Recompile the Class Labels
    Ori_Vtrainlabel = train_y
    Ori_Vtestlabel = test_y

    
    Vtrainlabel = np_utils.to_categorical(train_y, n_class)
    Vtestlabel = np_utils.to_categorical(test_y, n_class)
    
    # Model construction
    model = Sequential()

    model.add(Conv1D(filters=_CUSTOM_FILTER_NUMBER_[0], 
                 kernel_size=_CUSTOM_KERNEL_SIZE_[0],
                 padding='same',
                 activation=_CUSTOM_ACT_FUNCTION_,
                 strides=_CUSTOM_STRIDES_,
                 input_shape=(featureLen, input_dim)))  
    model.add(MaxPooling1D(pool_size = _CUSTOM_POOL_SIZE_[0], strides=None, padding='same')) 
          
    """ 
    model.add(Conv1D(filters=_CUSTOM_FILTER_NUMBER_[0],
                 kernel_size=_CUSTOM_KERNEL_SIZE_[0],
                 padding='same',                        
                 activation=_CUSTOM_ACT_FUNCTION_,      
                 strides=_CUSTOM_STRIDES_))
    #model.add(Dropout(_CUSTOM_DROP_RATE_))
    model.add(MaxPooling1D(pool_size = _CUSTOM_POOL_SIZE_[0], strides=None, padding='same'))
    """ 
    
    #model.add(BatchNormalization())
    model.add(Flatten())
    
    
    #model.add(Dense(n_class, kernel_regularizer=regularizers.l2(0.01),activity_regularizer=regularizers.l1(0.001)))    
    model.add(Dense(n_class))
    
    model.add(Dropout(0.6))
    
    model.add(Activation(_CUSTOM_ACT_FUNCTION_OUTPUT_LAYER_))

    model.compile(loss=_CUSTOM_LOSS_FUNCTION_, optimizer=_CUSTOM_OPT_FUNCTION_, metrics=['binary_accuracy'])

    #model.summary()

    #训练模型
    hist = model.fit(Mtraindata, Vtrainlabel, 
                 batch_size =_CUSTOM_BATCH_SIZE_,
                 epochs=_CUSTOM_EPOCHS_,      
                 verbose=0,       
                 shuffle=True,    
                 validation_split=_CUSTOM_SPLIT_RATE_)

    score = model.evaluate(Mtestdata, Vtestlabel, verbose=0)
    
    print('Test loss:', score[0])
    
    sum_loss+=score[0]
    
    tr_predict = model.predict(Mtraindata)
    test_predict = model.predict(Mtestdata)

    train_score_y=tr_predict[:,1]
    test_score_y=test_predict[:,1]
    
    
    fpr1, tpr1, thresholds1 = roc_curve(train_y.astype(int), train_score_y, pos_label=1)
    auc1= auc(fpr1,tpr1)
    sum_auc_train+=auc1
    print("AUC Score (Training):  %5.4f"%auc1)
    fpr2, tpr2, thresholds2 = roc_curve(test_y.astype(int), test_score_y, pos_label=1)
    auc2= auc(fpr2,tpr2)
    sum_auc_test+=auc2
    print("AUC Score (Test):  %5.4f"%auc2)
    
    
    tr_predict = tr_predict.argmax(1)
    test_predict = test_predict.argmax(1)
    
    acc_train=metrics.accuracy_score(Ori_Vtrainlabel.astype(int), tr_predict)
    sum_train_accuracy+=acc_train
    print("ACC Score (Training):  %5.4f"%acc_train)
    
    acc_test=metrics.accuracy_score(Ori_Vtestlabel.astype(int), test_predict)
    sum_test_accuracy+=acc_test
    print("ACC Score (Test):  %5.4f"%acc_test)


    tn1, fp1, fn1, tp1 = metrics.confusion_matrix(Ori_Vtrainlabel.astype(int), tr_predict, labels=[0, 1]).ravel()
    #print("Training: (TP, FP, TN, FN):  %d, %d, %d, %d"%(tp1, fp1, tn1, fn1))
    tn, fp, fn, tp = metrics.confusion_matrix(Ori_Vtestlabel.astype(int), test_predict, labels=[0, 1]).ravel()
    #print("Test: (TP, FP, TN, FN):  %d, %d, %d, %d"%(tp, fp, tn, fn))
    
    SE_train=tp1/ float(tp1+ fn1)
    SE_test=tp/ float(tp+ fn)
    sum_se_train+=SE_train
    sum_se_test+=SE_test
    #print("SE (Training):  %5.4f"%SE_train)
    #print("SE (Test):  %5.4f"%SE_test)
    SP_train= tn1 / float(tn1 + fp1)
    SP_test= tn / float(tn + fp)
    sum_sp_train+=SP_train
    sum_sp_test+=SP_test
    #print("SP (Training):  %5.4f"%SP_train)
    #print("SP (Test):  %5.4f"%SP_test)
    
    mcc_train = metrics.matthews_corrcoef(Ori_Vtrainlabel.astype(int), tr_predict)
    sum_train_mcc+=mcc_train
    #print ("MCC Score (Training):  %5.4f"%mcc_train)
    mcc_test = metrics.matthews_corrcoef(Ori_Vtestlabel.astype(int), test_predict)
    sum_test_mcc+=mcc_test
    #print ("MCC Score (Test):  %5.4f"%mcc_test)
    
    precision_train=precision_score(Ori_Vtrainlabel.astype(int), tr_predict, average='weighted')
    precision_test=precision_score(Ori_Vtestlabel.astype(int), test_predict, average='weighted')
    sum_precision_train+=precision_train
    sum_precision_test+=precision_test

    #print("precision_train:",precision_train)
    #print("precision_test:",precision_test)

    recall_train=recall_score(Ori_Vtrainlabel.astype(int), tr_predict, average='weighted')
    recall_test=recall_score(Ori_Vtestlabel.astype(int), test_predict, average='weighted')
    sum_recall_train+=recall_train
    sum_recall_test+=recall_test
    #print("recall_train:",recall_train)
    #print("recall_test:",recall_test)

    #计算F1值
    f1_train=f1_score(Ori_Vtrainlabel.astype(int), tr_predict, average='weighted')
    f1_test=f1_score(Ori_Vtestlabel.astype(int), test_predict, average='weighted')
    sum_f1_train+=f1_train
    sum_f1_test+=f1_test
    #print("f1_train:",f1_train)
    #print("f1_test:",f1_test)
    
print('Average train accuracy:', sum_train_accuracy/trainning_times)
print('Average test accuracy:', sum_test_accuracy/trainning_times)
print('Average train auc:', sum_auc_train/trainning_times)
print('Average test auc:', sum_auc_test/trainning_times)
print('Average train SE:', sum_se_train/trainning_times)
print('Average test SE:', sum_se_test/trainning_times)
print('Average train SP:', sum_sp_train/trainning_times)
print('Average test SP:', sum_sp_test/trainning_times)
print('Average mcc_train:',sum_train_mcc/trainning_times)
print('Average mcc_test:',sum_test_mcc/trainning_times)
print('Average precision_train:',sum_precision_train/trainning_times)
print('Average precision_test:',sum_precision_test/trainning_times)
print('Average recall_train:',sum_recall_train/trainning_times)
print('Average recall_test:',sum_recall_test/trainning_times)
print('Average f1_train:',sum_f1_train/trainning_times)
print('Average f1_test:',sum_f1_test/trainning_times)