In [1]:
from resnet_ecg.utils import one_hot,get_batches
from resnet_ecg.ecg_preprocess import ecg_preprocessing
from resnet_ecg.densemodel import Net
from resnet_ecg.ecg import resnetmodel

Using TensorFlow backend.


In [2]:
from tqdm import tqdm
import numpy as np
import pandas as pd
from utils import extract_basic_features

import wfdb
import os
import wfdb.processing as wp
import matplotlib.pyplot as plt
from scipy import signal
from utils import find_noise_features, extract_basic_features
import shutil
import json
import time
import random as rn
#from lightgbm import LGBMClassifier
from scipy import sparse
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold,StratifiedKFold
#from xgboost import XGBClassifier


In [3]:
from keras.utils import to_categorical
from keras.optimizers import SGD,Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler,EarlyStopping,ReduceLROnPlateau
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF
from sklearn.model_selection import StratifiedKFold

config = tf.ConfigProto(intra_op_parallelism_threads=1,inter_op_parallelism_threads=1)
config.gpu_options.per_process_gpu_memory_fraction = 0.8

session = tf.Session(config=config)

KTF.set_session(session )

In [4]:
os.environ['PYTHONHASHSEED'] = '0'

np.random.seed(42)
rn.seed(12345)

tf.set_random_seed(1234)

In [5]:
import os
import warnings
warnings.filterwarnings("ignore")
import scipy.io as sio
train_dataset_path = os.getcwd()+"/Train/"
val_dataset_path = os.getcwd()+"/Val/"

In [6]:
train_files = os.listdir(train_dataset_path)
train_files.sort()
val_files = os.listdir(val_dataset_path)
val_files.sort()

In [7]:
labels = pd.read_csv("reference.csv")
labels.head()

Unnamed: 0,File_name,label1,label2,label3,label4,label5,label6,label7,label8
0,TRAIN0001,8,,,,,,,
1,TRAIN0002,8,,,,,,,
2,TRAIN0003,8,,,,,,,
3,TRAIN0004,8,,,,,,,
4,TRAIN0005,8,,,,,,,


In [8]:
from keras.layers import Input
from keras.models import Model,load_model

config_file = './resnet_ecg/examples/cinc17/config.json'
params = json.load(open(config_file,'r'))
params.update({
        'input_shape':[2560,12],
        'num_categories':9
    })

inputs0 = Input(shape=(2560,12))
inputs1 = Input(shape=(2560,12))
inputs2 = Input(shape=(2560,12))
inputs3 = Input(shape=(2560,12))
inputs4 = Input(shape=(2560,12))
inputs5 = Input(shape=(2560,12))
inputs6 = Input(shape=(2560,12))
inputs7 = Input(shape=(2560,12))
inputs8 = Input(shape=(2560,12))
inputs9 = Input(shape=(2560,12))

inputs_list = [inputs0,inputs1,inputs2,inputs3,inputs4,inputs5,inputs6,inputs7,inputs8,inputs9]

outputs = resnetmodel.build_network(inputs_list,0.5,num_classes=9,**params)
model = Model(inputs =inputs_list,outputs=outputs)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [9]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 2560, 12)     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 2560, 12)     0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 2560, 12)     0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 2560, 12)     0                                            
__________________________________________________________________________________________________
input_5 (I

In [10]:
def read_data_seg(data_path, split = "Train",preprocess=False,fs=500,newFs=256,winSecond=10,winNum=10,n_index=0):
    """ Read data """

    # Fixed params
    #n_index = 0
    n_class = 9
    winSize = winSecond*fs
    new_winSize = winSecond*newFs
    # Paths
    path_signals = os.path.join(data_path, split)

    # Read labels and one-hot encode
    #label_path = os.path.join(data_path, "reference.txt")
    #labels = pd.read_csv(label_path, sep='\t',header = None)
    #labels = pd.read_csv("reference.csv")

    # Read time-series data
    channel_files = os.listdir(path_signals)
    #print(channel_files)
    channel_files.sort()
    n_channels = 12#len(channel_files)
    #posix = len(split) + 5

    # Initiate array
    list_of_channels = []
    
    X = np.zeros((len(channel_files), new_winSize, n_channels))
    i_ch = 0
    
    channel_name = ['V6', 'aVF', 'I', 'V4', 'V2', 'aVL', 'V1','II', 'aVR', 'V3', 'III', 'V5']
    channel_mid_name = ['II','aVR','V2','V5']
    channel_post_name = ['III','aVF','V3','V6']
    
    for i_ch,fil_ch in enumerate(channel_files[:]):#tqdm
        #print(fil_ch)
        ecg = sio.loadmat(os.path.join(path_signals,fil_ch))
        ecg_length = ecg["I"].shape[1]
        
        if ecg_length > fs*winNum*winSecond:
            print(" too long !!!",ecg_length)
            ecg_length = fs*winNum*winSecond
        if ecg_length < 4500:
            print(" too short !!!",ecg_length)
            break
        
        slide_steps = int((ecg_length- winSize)/winSecond)
        
        if ecg_length <= 4500:
            slide_steps = 0
            
        ecg_channels = np.zeros((new_winSize, n_channels))
        
        for i_n,ch_name in enumerate(channel_name):

            ecg_channels[:,i_n] = signal.resample(ecg[ch_name]
                                                  [:,n_index*slide_steps:n_index*slide_steps+winSize].T
                                                  ,new_winSize).T
            if preprocess:
                data = ecg_preprocessing(ecg_channels[:,i_n].reshape(1,new_winSize), 'sym8', 8, 3, newFs)
                ecg_channels[:,i_n] = data[0]
            else:
                pass
                ecg_channels[:,i_n] = ecg_channels[:,i_n]
                
        X[i_ch,:,:] = ecg_channels

    return X

In [11]:
ecg12_seg0 = read_data_seg(os.getcwd(),n_index=0)
ecg12_seg1 = read_data_seg(os.getcwd(),n_index=1)
ecg12_seg2 = read_data_seg(os.getcwd(),n_index=2)
ecg12_seg3 = read_data_seg(os.getcwd(),n_index=3)
ecg12_seg4 = read_data_seg(os.getcwd(),n_index=4)

In [12]:
ecg12_seg0.shape

(6500, 2560, 12)

In [13]:
ecg12_seg5 = read_data_seg(os.getcwd(),n_index=5)
ecg12_seg6 = read_data_seg(os.getcwd(),n_index=6)
ecg12_seg7 = read_data_seg(os.getcwd(),n_index=7)
ecg12_seg8 = read_data_seg(os.getcwd(),n_index=8)
ecg12_seg9 = read_data_seg(os.getcwd(),n_index=9)

In [14]:
ecg12_seg9.shape

(6500, 2560, 12)

In [15]:
class Config(object):
    def __init__(self):
        self.conv_subsample_lengths = [1, 2, 1, 2, 1, 2, 1, 2]
        self.conv_filter_length = 32
        self.conv_num_filters_start = 12
        self.conv_init = "he_normal"
        self.conv_activation = "relu"
        self.conv_dropout = 0.5
        self.conv_num_skip = 2
        self.conv_increase_channels_at = 2
        self.batch_size = 32#128
        self.input_shape = [2560, 12]#[1280, 1]
        self.num_categories = 2

    @staticmethod
    def lr_schedule(epoch):
        lr = 0.1
        if epoch >= 20 and epoch < 40:
            lr = 0.01
        if epoch >= 40:
            lr = 0.001
        print('Learning rate: ', lr)
        return lr

In [16]:
import keras.backend as K

def precision(y_true, y_pred):
    # Calculates the precision
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall(y_true, y_pred):
    # Calculates the recall
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def fbeta_score(y_true, y_pred, beta=1):
    # Calculates the F score, the weighted harmonic mean of precision and recall.
    if beta < 0:
        raise ValueError('The lowest choosable beta is zero (only precision).')
    
    # If there are no true positives, fix the F score at 0 like sklearn.
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0

    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score

def fmeasure(y_true, y_pred):
    # Calculates the f-measure, the harmonic mean of precision and recall.
    return fbeta_score(y_true, y_pred, beta=1)


In [17]:
bin_label = np.zeros((6500,9))
for i in range(labels.shape[0]):
    label_nona = labels.loc[i].dropna()
    for j in range(1,label_nona.shape[0]):
        bin_label[i,int(label_nona[j])]=1

In [18]:
bin_label.shape

(6500, 9)

In [19]:
ecg12_seg0.shape

(6500, 2560, 12)

In [20]:
train_index = np.arange(6500)

In [21]:
train_index

array([   0,    1,    2, ..., 6497, 6498, 6499])

In [22]:
from sklearn.model_selection import train_test_split
index_tr, index_vld, lab_tr, lab_vld = train_test_split(train_index, labels['label1'], 
                                        stratify = labels['label1'], #test_size=0.1,
                                        shuffle=True, random_state = 123)

In [23]:
index_tr.shape

(4875,)

In [24]:
X_tr = [ecg12_seg0[index_tr],ecg12_seg1[index_tr],ecg12_seg2[index_tr],ecg12_seg3[index_tr],
        ecg12_seg4[index_tr],ecg12_seg5[index_tr],ecg12_seg6[index_tr],ecg12_seg7[index_tr],
        ecg12_seg8[index_tr],ecg12_seg9[index_tr],
       ]

In [25]:
index_vld.shape

(1625,)

In [26]:
X_vld = [ecg12_seg0[index_vld],ecg12_seg1[index_vld],ecg12_seg2[index_vld],ecg12_seg3[index_vld],
         ecg12_seg4[index_vld],ecg12_seg5[index_vld],ecg12_seg6[index_vld],ecg12_seg7[index_vld],
         ecg12_seg8[index_vld],ecg12_seg9[index_vld],
        ]

In [27]:
ecg12_seg0[index_vld].shape

(1625, 2560, 12)

In [28]:
def preprocess_y(y,num_class=9):
    bin_label = np.zeros((y.shape[0],num_class))
    for i in range(y.shape[0]):
        label_nona = labels.loc[lab_tr.index.tolist()[i]].dropna()
        for j in range(1,label_nona.shape[0]):
            bin_label[i,int(label_nona[j])]=1
    return bin_label

In [29]:
y_tr = preprocess_y(lab_tr)
y_vld = preprocess_y(lab_vld) 

In [30]:
y_vld.shape

(1625, 9)

In [31]:
model_name = 'weights_best_resnet_model_23'

checkpointer = ModelCheckpoint(filepath=model_name+'.hdf5', 
                            monitor='val_fmeasure',verbose=1, save_weights_only=True,
                            save_best_only=True, mode='max')
reduce = ReduceLROnPlateau(monitor='val_fmeasure',factor=0.5,patience=2,verbose=1,min_delta=1e-4,mode='max')
''' 
model.compile(optimizer = 'adam',
           loss='binary_crossentropy',
           metrics=['accuracy',fmeasure,recall,precision])

epochs = 20

history = model.fit_generator(train_generator,
       validation_data = val_generator,
       epochs=epochs,
       callbacks=[checkpointer,reduce],
       verbose=1)
'''
pass

In [32]:
def add_compile(model, config):
    
    optimizer = SGD(lr=config.lr_schedule(0), momentum=0.9)#Adam()#
    model.compile(loss='binary_crossentropy',#weighted_loss,#'binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy',fmeasure,recall,precision])   

In [33]:
import gc
if False:# True  False
    K.clear_session()
    gc.collect()
    config = tf.ConfigProto(intra_op_parallelism_threads=1,inter_op_parallelism_threads=1)
    config.gpu_options.per_process_gpu_memory_fraction = 0.8

    session = tf.Session(config=config)

    KTF.set_session(session )

In [34]:
config = Config()
add_compile(model, config)

earlystop = EarlyStopping(
            monitor='val_fmeasure',#'val_categorical_accuracy',
            patience=10,
            )
checkpoint = ModelCheckpoint(filepath=model_name,
                             monitor='val_categorical_accuracy', mode='max',
                             save_best_only='True')

lr_scheduler = LearningRateScheduler(config.lr_schedule)

callback_lists = [checkpointer,reduce]#[checkpointer,earlystop,lr_scheduler]
#[checkpoint, earlystop,lr_scheduler] 

history = model.fit(x=X_tr, y=y_tr, batch_size=32, epochs=20,  #class_weight=cw,#'auto',
          verbose=1, validation_data=(X_vld, y_vld), callbacks=callback_lists )

Learning rate:  0.1
Instructions for updating:
Use tf.cast instead.
Train on 4875 samples, validate on 1625 samples
Epoch 1/20

Epoch 00001: val_fmeasure improved from -inf to 0.23007, saving model to weights_best_resnet_model_23.hdf5
Epoch 2/20

Epoch 00002: val_fmeasure did not improve from 0.23007
Epoch 3/20

Epoch 00003: val_fmeasure did not improve from 0.23007

Epoch 00003: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 4/20

Epoch 00004: val_fmeasure did not improve from 0.23007
Epoch 5/20

Epoch 00005: val_fmeasure did not improve from 0.23007

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 6/20

Epoch 00006: val_fmeasure did not improve from 0.23007
Epoch 7/20

Epoch 00007: val_fmeasure did not improve from 0.23007

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 8/20

Epoch 00008: val_fmeasure did not improve from 0.23007
Epoch 9/20

Epoch 00009: val_fmeasure improved from 0

In [35]:
#save model
model_json = model.to_json()
with open(model_name+'.json','w') as json_file:
    json_file.write(model_json)
json_file.close()

In [36]:
from keras.models import model_from_json
with open(model_name+'.json','r') as json_file:
    load_model_json = json_file.read()
json_file.close()

model = model_from_json(load_model_json)
model.load_weights(model_name+'.hdf5')

In [37]:
test_x_seg0 = read_data_seg(os.getcwd(),split='Val',n_index=0)
test_x_seg1 = read_data_seg(os.getcwd(),split='Val',n_index=1)
test_x_seg2 = read_data_seg(os.getcwd(),split='Val',n_index=2)
test_x_seg3 = read_data_seg(os.getcwd(),split='Val',n_index=3)
test_x_seg4 = read_data_seg(os.getcwd(),split='Val',n_index=4)

In [38]:
test_x_seg5 = read_data_seg(os.getcwd(),split='Val',n_index=5)
test_x_seg6 = read_data_seg(os.getcwd(),split='Val',n_index=6)
test_x_seg7 = read_data_seg(os.getcwd(),split='Val',n_index=7)
test_x_seg8 = read_data_seg(os.getcwd(),split='Val',n_index=8)
test_x_seg9 = read_data_seg(os.getcwd(),split='Val',n_index=9)

In [39]:
test_x = [test_x_seg0,test_x_seg1,test_x_seg2,test_x_seg3,test_x_seg4,
          test_x_seg5,test_x_seg6,test_x_seg7,test_x_seg8,test_x_seg9,
         ]

In [40]:
test_y = model.predict(test_x)

In [41]:
test_y

array([[9.4034564e-01, 3.3974648e-06, 7.0512295e-05, ..., 6.2715411e-03,
        1.9216359e-02, 3.3553839e-03],
       [2.0529687e-02, 2.1192431e-04, 8.7180734e-04, ..., 1.1174917e-02,
        4.7087669e-05, 1.3118297e-02],
       [1.9669533e-04, 8.1126344e-01, 1.0361940e-02, ..., 4.9620777e-02,
        4.6044588e-05, 8.0512315e-01],
       ...,
       [5.9604645e-08, 8.8751316e-04, 1.9134283e-03, ..., 3.0768812e-02,
        9.2387199e-07, 1.8979907e-03],
       [1.6311765e-02, 7.2371960e-04, 6.6626751e-01, ..., 1.5119076e-02,
        7.0303679e-05, 7.2297513e-01],
       [9.4874060e-01, 3.8146973e-05, 2.7394295e-04, ..., 2.7952641e-02,
        1.3104081e-04, 5.5548251e-03]], dtype=float32)

In [42]:
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import hamming_loss
import csv

In [43]:
train_x = [ecg12_seg0[:],ecg12_seg1[:],ecg12_seg2[:],ecg12_seg3[:],
           ecg12_seg4[:],ecg12_seg5[:],ecg12_seg6[:],ecg12_seg7[:],
           ecg12_seg8[:],ecg12_seg9[:],
          ]

In [44]:
x_tr_y = model.predict(train_x)#X_tr

threshold = np.arange(0.1,0.9,0.1)

out = x_tr_y
y_test = bin_label#y_tr

acc = []
accuracies = []
best_threshold = np.zeros(out.shape[1])
for i in range(out.shape[1]):
    y_prob = np.array(out[:,i])
    for j in threshold:
        y_pred = [1 if prob>=j else 0 for prob in y_prob]
        acc.append( matthews_corrcoef(y_test[:,i],y_pred))
    acc   = np.array(acc)
    index = np.where(acc==acc.max()) 
    accuracies.append(acc.max()) 
    best_threshold[i] = threshold[index[0][0]]
    acc = []

print("best_threshold: ",best_threshold)

y_pred = np.array([[1 if out[i,j]>=best_threshold[j] else 0 for j in range(y_test.shape[1])] for i in range(len(y_test))])

y_pred 

y_test

#best_threshold:  [0.7 0.4 0.5 0.4 0.3 0.2 0.3 0.4 0.4]
#0.022393162393162393

hamming_loss(y_test,y_pred)

best_threshold:  [0.5 0.3 0.4 0.1 0.2 0.2 0.2 0.1 0.7]


0.02634188034188034

In [45]:
for i,v in enumerate(best_threshold):
    if v > 0.1:
        best_threshold[i] =  v# - 0.1
best_threshold#array([0.8, 0.6, 0.5, 0.1, 0.3, 0.3, 0.4, 0.3, 0.4])

array([0.5, 0.3, 0.4, 0.1, 0.2, 0.2, 0.2, 0.1, 0.7])

In [46]:
classes = [0,1,2,3,4,5,6,7,8]

y_pred = [[1 if test_y[i,j]>=best_threshold[j] else 0 for j in range(test_y.shape[1])] 
          for i in range(len(test_y))]
pred=[]
for j in range(test_y.shape[0]):
    pred.append([classes[i] for i in range(9) if y_pred[j][i] == 1])

with open('answers116.csv','w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['File_name', 'label1', 'label2',
                    'label3', 'label4', 'label5', 'label6', 'label7', 'label8'])
    count = 0
    for file_name in val_files:
        if file_name.endswith('.mat'):
            
            record_name = file_name.strip('.mat')
            answer = []
            answer.append(record_name)
            
            result = pred[count]
            
            answer.extend(result)
            for i in range(8-len(result)):
                answer.append('')
                
            #print(answer)
            count += 1
            writer.writerow(answer)
    csvfile.close()

In [47]:
import csv
thred = 0.15
''' 
pred = []
for i in range(test_y.shape[0]):
    
    try:
        pred_list = list(np.hstack(np.argwhere(test_y[i]>thred)))
    except ValueError:
        print(" ValueError !!! ")
        pred_list = ['']
        
    pred.append(pred_list)
    
with open('answers66.csv','w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['File_name', 'label1', 'label2',
                    'label3', 'label4', 'label5', 'label6', 'label7', 'label8'])
    count = 0
    for file_name in val_files:
        if file_name.endswith('.mat'):
            
            record_name = file_name.strip('.mat')
            answer = []
            answer.append(record_name)
            
            result = pred[count]
            
            answer.extend(result)
            for i in range(8-len(result)):
                answer.append('')
                
            #print(answer)
            count += 1
            writer.writerow(answer)
    csvfile.close()
'''
pass