In [1]:
from resnet_ecg.utils import one_hot,get_batches
from resnet_ecg.ecg_preprocess import ecg_preprocessing
from resnet_ecg.densemodel import Net
from resnet_ecg.ecg import resnetmodel

Using TensorFlow backend.


In [2]:
from tqdm import tqdm
import numpy as np
import pandas as pd
from utils import extract_basic_features

import wfdb
import os
import wfdb.processing as wp
import matplotlib.pyplot as plt
from scipy import signal
from utils import find_noise_features, extract_basic_features
import shutil
import json
import time
import random as rn
#from lightgbm import LGBMClassifier
from scipy import sparse
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold,StratifiedKFold
#from xgboost import XGBClassifier


In [3]:
from keras.utils import to_categorical
from keras.optimizers import SGD,Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler,EarlyStopping,ReduceLROnPlateau
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF
from sklearn.model_selection import StratifiedKFold

config = tf.ConfigProto(intra_op_parallelism_threads=1,inter_op_parallelism_threads=1)
config.gpu_options.per_process_gpu_memory_fraction = 0.8

session = tf.Session(config=config)

KTF.set_session(session )

In [4]:
os.environ['PYTHONHASHSEED'] = '0'

np.random.seed(42)
rn.seed(12345)

tf.set_random_seed(1234)

In [5]:
import os
import warnings
warnings.filterwarnings("ignore")
import scipy.io as sio
train_dataset_path = os.getcwd()+"/Train/"
val_dataset_path = os.getcwd()+"/Val/"

In [6]:
train_files = os.listdir(train_dataset_path)
train_files.sort()
val_files = os.listdir(val_dataset_path)
val_files.sort()

In [7]:
labels = pd.read_csv("reference.csv")
labels.head()

Unnamed: 0,File_name,label1,label2,label3,label4,label5,label6,label7,label8
0,TRAIN0001,8,,,,,,,
1,TRAIN0002,8,,,,,,,
2,TRAIN0003,8,,,,,,,
3,TRAIN0004,8,,,,,,,
4,TRAIN0005,8,,,,,,,


In [8]:
import keras.backend as K

def precision(y_true, y_pred):
    # Calculates the precision
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall(y_true, y_pred):
    # Calculates the recall
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def fbeta_score(y_true, y_pred, beta=1):
    # Calculates the F score, the weighted harmonic mean of precision and recall.
    if beta < 0:
        raise ValueError('The lowest choosable beta is zero (only precision).')
    
    # If there are no true positives, fix the F score at 0 like sklearn.
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0

    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score

def fmeasure(y_true, y_pred):
    # Calculates the f-measure, the harmonic mean of precision and recall.
    return fbeta_score(y_true, y_pred, beta=1)


In [None]:
from keras.models import load_model
model_densenet1 = load_model("weights_best_simple_model_10_837.hdf5",custom_objects={'fmeasure':fmeasure,
                                                                           'recall':recall,
                                                                           'precision':precision})

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


In [None]:
model_densenet2 = load_model("weights_best_simple_model_10_833.hdf5",custom_objects={'fmeasure':fmeasure,
                                                                           'recall':recall,
                                                                           'precision':precision})

In [None]:
def read_data_seg(data_path, split = "Train",preprocess=False,fs=500,newFs=256,winSecond=10,winNum=10,n_index=0):
    """ Read data """

    # Fixed params
    #n_index = 0
    n_class = 9
    winSize = winSecond*fs
    new_winSize = winSecond*newFs
    # Paths
    path_signals = os.path.join(data_path, split)

    # Read labels and one-hot encode
    #label_path = os.path.join(data_path, "reference.txt")
    #labels = pd.read_csv(label_path, sep='\t',header = None)
    #labels = pd.read_csv("reference.csv")

    # Read time-series data
    channel_files = os.listdir(path_signals)
    #print(channel_files)
    channel_files.sort()
    n_channels = 12#len(channel_files)
    #posix = len(split) + 5

    # Initiate array
    list_of_channels = []
    
    X = np.zeros((len(channel_files), new_winSize, n_channels))
    i_ch = 0
    
    channel_name = ['V6', 'aVF', 'I', 'V4', 'V2', 'aVL', 'V1','II', 'aVR', 'V3', 'III', 'V5']
    channel_mid_name = ['II','aVR','V2','V5']
    channel_post_name = ['III','aVF','V3','V6']
    
    for i_ch,fil_ch in enumerate(channel_files[:]):#tqdm
        #print(fil_ch)
        ecg = sio.loadmat(os.path.join(path_signals,fil_ch))
        ecg_length = ecg["I"].shape[1]
        
        if ecg_length > fs*winNum*winSecond:
            print(" too long !!!",ecg_length)
            ecg_length = fs*winNum*winSecond
        if ecg_length < 4500:
            print(" too short !!!",ecg_length)
            break
        
        slide_steps = int((ecg_length- winSize)/winSecond)
        
        if ecg_length <= 4500:
            slide_steps = 0
            
        ecg_channels = np.zeros((new_winSize, n_channels))
        
        for i_n,ch_name in enumerate(channel_name):

            ecg_channels[:,i_n] = signal.resample(ecg[ch_name]
                                                  [:,n_index*slide_steps:n_index*slide_steps+winSize].T
                                                  ,new_winSize).T
            if preprocess:
                data = ecg_preprocessing(ecg_channels[:,i_n].reshape(1,new_winSize), 'sym8', 8, 3, newFs)
                ecg_channels[:,i_n] = data[0]
            else:
                pass
                ecg_channels[:,i_n] = ecg_channels[:,i_n]
                
        X[i_ch,:,:] = ecg_channels

    return X

In [None]:
ecg12_seg0 = read_data_seg(os.getcwd(),n_index=0)
ecg12_seg1 = read_data_seg(os.getcwd(),n_index=1)
ecg12_seg2 = read_data_seg(os.getcwd(),n_index=2)
ecg12_seg3 = read_data_seg(os.getcwd(),n_index=3)
ecg12_seg4 = read_data_seg(os.getcwd(),n_index=4)

In [None]:
ecg12_seg5 = read_data_seg(os.getcwd(),n_index=5)
ecg12_seg6 = read_data_seg(os.getcwd(),n_index=6)
ecg12_seg7 = read_data_seg(os.getcwd(),n_index=7)
ecg12_seg8 = read_data_seg(os.getcwd(),n_index=8)
ecg12_seg9 = read_data_seg(os.getcwd(),n_index=9)

In [None]:
test_x_seg0 = read_data_seg(os.getcwd(),split='Val',n_index=0)
test_x_seg1 = read_data_seg(os.getcwd(),split='Val',n_index=1)
test_x_seg2 = read_data_seg(os.getcwd(),split='Val',n_index=2)
test_x_seg3 = read_data_seg(os.getcwd(),split='Val',n_index=3)
test_x_seg4 = read_data_seg(os.getcwd(),split='Val',n_index=4)

In [None]:
test_x_seg5 = read_data_seg(os.getcwd(),split='Val',n_index=5)
test_x_seg6 = read_data_seg(os.getcwd(),split='Val',n_index=6)
test_x_seg7 = read_data_seg(os.getcwd(),split='Val',n_index=7)
test_x_seg8 = read_data_seg(os.getcwd(),split='Val',n_index=8)
test_x_seg9 = read_data_seg(os.getcwd(),split='Val',n_index=9)

In [None]:
test_x = [test_x_seg0,test_x_seg1,test_x_seg2,test_x_seg3,test_x_seg4,
          test_x_seg5,test_x_seg6,test_x_seg7,test_x_seg8,test_x_seg9,
         ]

In [None]:
test_y = model.predict(test_x)

In [None]:
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import hamming_loss
import csv

In [None]:
train_x = [ecg12_seg0[:],ecg12_seg1[:],ecg12_seg2[:],ecg12_seg3[:],
           ecg12_seg4[:],ecg12_seg5[:],ecg12_seg6[:],ecg12_seg7[:],
           ecg12_seg8[:],ecg12_seg9[:],
          ]

In [None]:
x_tr_y = model.predict(train_x)#X_tr

threshold = np.arange(0.1,0.9,0.1)

out = x_tr_y
y_test = bin_label#y_tr

acc = []
accuracies = []
best_threshold = np.zeros(out.shape[1])
for i in range(out.shape[1]):
    y_prob = np.array(out[:,i])
    for j in threshold:
        y_pred = [1 if prob>=j else 0 for prob in y_prob]
        acc.append( matthews_corrcoef(y_test[:,i],y_pred))
    acc   = np.array(acc)
    index = np.where(acc==acc.max()) 
    accuracies.append(acc.max()) 
    best_threshold[i] = threshold[index[0][0]]
    acc = []

print("best_threshold: ",best_threshold)

y_pred = np.array([[1 if out[i,j]>=best_threshold[j] else 0 for j in range(y_test.shape[1])] for i in range(len(y_test))])

y_pred 

y_test

#best_threshold:  [0.7 0.4 0.5 0.4 0.3 0.2 0.3 0.4 0.4]
#0.022393162393162393

hamming_loss(y_test,y_pred)

In [None]:
classes = [0,1,2,3,4,5,6,7,8]

y_pred = [[1 if test_y[i,j]>=best_threshold[j] else 0 for j in range(test_y.shape[1])] 
          for i in range(len(test_y))]
pred=[]
for j in range(test_y.shape[0]):
    pred.append([classes[i] for i in range(9) if y_pred[j][i] == 1])

with open('answers116.csv','w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['File_name', 'label1', 'label2',
                    'label3', 'label4', 'label5', 'label6', 'label7', 'label8'])
    count = 0
    for file_name in val_files:
        if file_name.endswith('.mat'):
            
            record_name = file_name.strip('.mat')
            answer = []
            answer.append(record_name)
            
            result = pred[count]
            
            answer.extend(result)
            for i in range(8-len(result)):
                answer.append('')
                
            #print(answer)
            count += 1
            writer.writerow(answer)
    csvfile.close()