In [2]:
import os
import shutil
import gc
import time
import random as rn
import numpy as np
import pandas as pd
import warnings
import csv

import scipy.io as sio
from scipy import signal
from tqdm import tqdm
import matplotlib.pyplot as plt

from scipy import sparse
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold, StratifiedKFold

# import wfdb
# import wfdb.processing as wp
# from utils import extract_basic_features
# from utils import find_noise_features, extract_basic_features
# from lightgbm import LGBMClassifier
# from xgboost import XGBClassifier

''' '''
# from resnet_ecg.utils import one_hot,get_batches
from resnet_ecg.ecg_preprocess import ecg_preprocessing
from resnet_ecg.densemodel import Net
from keras.utils import to_categorical
from keras.optimizers import SGD, Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF
import keras.backend as K
from keras.layers import Input
from keras.models import Model, load_model
import keras
import pywt


warnings.filterwarnings("ignore")

Using TensorFlow backend.


In [3]:
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(42)
rn.seed(12345)
tf.set_random_seed(1234)


# path of training data
path = '/media/jdcloud/'

class Config(object):
    def __init__(self):
        self.conv_subsample_lengths = [1, 2, 1, 2, 1, 2, 1, 2]
        self.conv_filter_length = 32
        self.conv_num_filters_start = 12
        self.conv_init = "he_normal"
        self.conv_activation = "relu"
        self.conv_dropout = 0.5
        self.conv_num_skip = 2
        self.conv_increase_channels_at = 2
        self.batch_size = 32  # 128
        self.input_shape = [2560, 12]  # [1280, 1]
        self.num_categories = 2

    @staticmethod
    def lr_schedule(epoch):
        lr = 0.1
        if epoch >= 10 and epoch < 20:
            lr = 0.01
        if epoch >= 20:
            lr = 0.001
        print('Learning rate: ', lr)
        return lr

In [4]:
def wavelet(ecg, wavefunc, lv, m, n):  #

    coeff = pywt.wavedec(ecg, wavefunc, mode='sym', level=lv)  #
    # sgn = lambda x: 1 if x > 0 else -1 if x < 0 else 0

    for i in range(m, n + 1):
        cD = coeff[i]
        for j in range(len(cD)):
            Tr = np.sqrt(2 * np.log(len(cD)))
            if cD[j] >= Tr:
                coeff[i][j] = np.sign(cD[j]) - Tr
            else:
                coeff[i][j] = 0

    denoised_ecg = pywt.waverec(coeff, wavefunc)
    return denoised_ecg


def wavelet_db6(sig):
    """
    R J, Acharya U R, Min L C. ECG beat classification using PCA, LDA, ICA and discrete
     wavelet transform[J].Biomedical Signal Processing and Control, 2013, 8(5): 437-448.
    param sig: 1-D numpy Array
    return: 1-D numpy Array
    """
    coeffs = pywt.wavedec(sig, 'db6', level=9)
    coeffs[-1] = np.zeros(len(coeffs[-1]))
    coeffs[-2] = np.zeros(len(coeffs[-2]))
    coeffs[0] = np.zeros(len(coeffs[0]))
    sig_filt = pywt.waverec(coeffs, 'db6')
    return sig_filt


In [5]:
def precision(y_true, y_pred):
    # Calculates the precision
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def recall(y_true, y_pred):
    # Calculates the recall
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def fbeta_score(y_true, y_pred, beta=1):
    # Calculates the F score, the weighted harmonic mean of precision and recall.
    if beta < 0:
        raise ValueError('The lowest choosable beta is zero (only precision).')

    # If there are no true positives, fix the F score at 0 like sklearn.
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0

    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score


def fmeasure(y_true, y_pred):
    # Calculates the f-measure, the harmonic mean of precision and recall.
    return fbeta_score(y_true, y_pred, beta=1)

In [6]:
def preprocess_y(labels, y, num_class=10):
    bin_label = np.zeros((len(y), num_class)).astype('int8')
    for i in range(len(y)):
        label_nona = labels.loc[y[i]].dropna()
        for j in range(1, label_nona.shape[0]):
            bin_label[i, int(label_nona[j])] = 1
    return bin_label

In [7]:
class DataGenerator(keras.utils.Sequence):
    # ' Generates data for Keras '

    def __init__(self, list_IDs, labels, batch_size=32, dim=(32,32,32), n_channels=1,
                 n_classes=10, shuffle=True):
        # 'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        # 'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        # 'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        # 'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        # 'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size,  *self.dim, self.n_channels))
        y = np.empty((self.batch_size, self.n_classes), dtype=int)
        #print(len(list_IDs_temp))
        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = np.load("training_data/" + ID+".npy")
            #print(preprocess_y(self.labels,self.labels[self.labels["File_name"]==ID.split("_")[0]].index))
            y[i,:] = preprocess_y(self.labels,self.labels[self.labels["File_name"] == ID.split("_")[0]].index)
            ''' 
            X[i*3+0,] = np.load("training_data/" + ID+".npy")
            X[i*3+1,] = np.load("training_data/" + ID+"_db4.npy")
            X[i*3+2,] = np.load("training_data/" + ID+"_db6.npy")
            l1 = preprocess_y(self.labels,self.labels[self.labels["File_name"] == ID].index)
            y[i*3+0,:] = l1
            y[i*3+1,:] = l1
            y[i*3+2,:] = l1
            '''
        #X_list = [(X[:, i]-np.mean(X[:, i]))/np.std(X[:, i]) for i in range(10)]
        X_list = [X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6], X[:, 7], X[:, 8], X[:, 9]]
        #print(X.shape)
        #print(y)
        del X
        
        return X_list, y  # keras.utils.to_categorical(y, num_classes=self.n_classes)

In [8]:
def add_compile(model, config):
    optimizer = SGD(lr=config.lr_schedule(0), momentum=0.9)  # Adam()#
    model.compile(loss='binary_crossentropy',  # weighted_loss,#'binary_crossentropy',
                  optimizer=Adam(lr=0.0001),#'adam',  # optimizer,#'adam',
                  metrics=['accuracy', fmeasure, precision])  # recall
    # ['accuracy',fbetaMacro,recallMacro,precisionMacro])
    # ['accuracy',fmeasure,recall,precision])

In [8]:
train_dataset_path = path + "/Train/"
val_dataset_path = path + "/Val/"

train_files = os.listdir(train_dataset_path)
train_files.sort()
val_files = os.listdir(val_dataset_path)
val_files.sort()

labels = pd.read_csv(path + "REFERENCE.csv")
labels_en = pd.read_csv(path + "kfold_labels_en.csv")
#data_info = pd.read_csv(path + "data_info.csv")

In [9]:
labels_en["label1"].values.shape

(7939,)

In [10]:
from collections import Counter
Counter(labels_en["label1"].values)

Counter({0.0: 1953,
         1.0: 478,
         2.0: 492,
         3.0: 926,
         4.0: 164,
         5.0: 627,
         6.0: 628,
         7.0: 222,
         8.0: 2049,
         9.0: 400})

In [11]:
Counter(labels["label1"].values)

Counter({0: 1953,
         1: 406,
         2: 414,
         3: 704,
         4: 101,
         5: 502,
         6: 532,
         7: 204,
         8: 1473,
         9: 400})

In [12]:
7939*3*0.7

16671.899999999998

In [13]:
6689*0.7

4682.299999999999

In [14]:
raw_IDs = labels["File_name"].values.tolist()#labels_en

extend_db4_IDs = [i + "_db4" for i in raw_IDs]
extend_db6_IDs = [i + "_db6" for i in raw_IDs]
all_IDs = raw_IDs + extend_db4_IDs + extend_db6_IDs

train_labels = labels_en["label1"].values
all_train_labels = np.hstack((train_labels, train_labels, train_labels))

# Parameters
params = {'dim': (10, 2560),
          'batch_size': 64,#20
          'n_classes': 10,
          'n_channels': 12,
          'shuffle': True}

en_amount = 1
model_path = './official_densenet_c_model/'
index = np.arange(6689)#23817
np.random.shuffle(index)

index_train = index[:4682]#16672
index_valid = index[4682:]

tr_IDs = np.array(raw_IDs)[index_train]#all_IDs
val_IDs = np.array(raw_IDs)[index_valid]

print(tr_IDs.shape)
print(val_IDs.shape)

(4682,)
(2007,)


In [15]:
len(raw_IDs)

6689

In [16]:
labels_en["File_name"].values.shape

(7939,)

In [17]:
4682+71*4+143*2+280

5532

In [18]:
from sklearn.model_selection import train_test_split
tr_IDs,val_IDs,tr_y,val_y = train_test_split(labels["File_name"].values.tolist(),labels["label1"].values,test_size=0.3,
                 random_state=2019,shuffle=True,stratify=labels["label1"].values)

In [19]:
for j in range(4):
    for i in labels[labels.label1==4]["File_name"]:
        if i in tr_IDs:
            tr_IDs.append(i)
            
for j in range(2):
    for i in labels[labels.label1==7]["File_name"]:
        if i in tr_IDs:
            tr_IDs.append(i)
            
for j in range(1):
    for i in labels[labels.label1==9]["File_name"]:
        if i in tr_IDs:
            tr_IDs.append(i)

In [20]:
len(tr_IDs)

5532

In [21]:
tr_IDs_db4 = [ids+"_db4" for ids in tr_IDs]
tr_IDs_db6 = [ids+"_db6" for ids in tr_IDs]

val_IDs_db4 = [ids+"_db4" for ids in val_IDs]
val_IDs_db6 = [ids+"_db6" for ids in val_IDs]

tr_IDs = tr_IDs + tr_IDs_db4 + tr_IDs_db6
val_IDs = val_IDs + val_IDs_db4 + val_IDs_db6

In [22]:
tr_IDs[:5]

['TRAIN1129', 'TRAIN3193', 'TRAIN3673', 'TRAIN4712', 'TRAIN0997']

In [23]:
len(tr_IDs)

16596

In [24]:
Counter(tr_y)

Counter({0: 1367,
         1: 284,
         2: 290,
         3: 493,
         4: 71,
         5: 351,
         6: 372,
         7: 143,
         8: 1031,
         9: 280})

In [25]:
Counter(val_y)

Counter({0: 586,
         1: 122,
         2: 124,
         3: 211,
         4: 30,
         5: 151,
         6: 160,
         7: 61,
         8: 442,
         9: 120})

In [26]:
# Generators
training_generator = DataGenerator(tr_IDs, labels, **params)
validation_generator = DataGenerator(val_IDs, labels, **params)
for i in validation_generator:
    #print(i)
    break

In [27]:
from keras.models import Model
from keras.layers import Dropout,Reshape,Dense, Input, BatchNormalization, Activation, add, Permute,  CuDNNGRU
from keras.layers import Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import LSTM, GRU, TimeDistributed, Bidirectional, LeakyReLU,Concatenate,Flatten
len_seg = 2560 #625
num_classes = 10

def add_model(x):
#     main_input = Input(shape=(len_seg,12), name='main_input')
#     x = Reshape((len_seg,12,1))(main_input)
    #main_input = Input(shape=(12,len_seg,1), name='main_input')
    #x = Permute((3,2,1))(main_input)
    x = SeparableConv2D(64, (1, 5), padding='valid')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((1,3), strides=(1,3), padding='same')(x)
    x = SeparableConv2D(128, (1,3), padding='valid')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((1,2), strides=(1,2), padding='valid')(x)
    x = SeparableConv2D(256, (1,3), padding='valid')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((1,2), strides=(1,2), padding='same')(x)
    x = Reshape((212, 256))(x)
    x = Bidirectional(CuDNNGRU(256, return_sequences=True,return_state=False))(x)
    x = Bidirectional(CuDNNGRU(512))(x)
    
    x = Dense(512,activation='relu')(x)
    x = Dropout(0.5)(x)
    
    #main_output = Dense(num_classes,activation='sigmoid')(x)
    #model = Model(inputs=main_input, outputs=main_output)
    #model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy',f1])
    return x

In [28]:
def nnet(inputs, keep_prob, num_classes):

    branches = []
    for i in range(int(len(inputs))):
        ld = inputs[i]
        ld = Reshape((1, int(2560), 12))(ld)
        bch = add_model(ld)
        branches.append(bch)
    features = Concatenate(axis=1)(branches)
    '''
    out1 = GlobalMaxPooling1D()(features)
    out2 = GlobalAveragePooling1D()(features)
    out3 = Flatten()(features)
    out = Concatenate(axis=-1)([out1, out2, out3])
    out = Dropout(0.5)(out)
    out = Dense(num_classes, activation="sigmoid", name="3")(out)
    net1 = out
    '''
    # ************************add attention*****************************
    ''' 
    cnnout = Dropout(0.2)(features)
    x1 = Bidirectional(CuDNNLSTM(60, input_shape=(10,60),return_sequences=True,return_state=False))(cnnout)#CuDNNGRU  CuDNNLSTM
    x1 = Activation('relu')(x1)

    x1 = Dropout(0.2)(x1)
    x1 = AttentionWithContext()(x1)
    x1 = Reshape((1, -1))(x1)
    x1 = BatchNormalization()(x1)
    x1 = Activation('relu')(x1)
    features = Dropout(0.2)(x1)
    '''
    #print(features)
    #features = Reshape((1, -1))(features)
    #features = Dropout(keep_prob, [1,len(inputs),1])(features)

    #features = Reshape((120,1))(features)
    #features = Dropout(keep_prob, [1, int(inputs.shape[-1]), 1])(features)
    #features = Bidirectional(CuDNNLSTM(10, return_sequences=True), merge_mode='concat')(features)

    #features = Flatten()(features)
    net = Dense(units=num_classes, activation='sigmoid')(features)
    return net

In [29]:
input_size = (2560, 12)
net_num = 10
inputs_list = [Input(shape=input_size) for _ in range(net_num)]
outputs = nnet(inputs_list, 0.5, num_classes=10)
model = Model(inputs=inputs_list, outputs=outputs)
#print(model.summary())

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [25]:

input_size = (2560, 12)
net_num = 10
inputs_list = [Input(shape=input_size) for _ in range(net_num)]
net = Net()
outputs = net.nnet(inputs_list, 0.5, num_classes=10)
model = Model(inputs=inputs_list, outputs=outputs)
print(model.summary())
''' 
from resnet_ecg.ecg import resnetmodel
import json
config_file = './resnet_ecg/examples/cinc17/config.json'
params = json.load(open(config_file,'r'))
params.update({
        'input_shape':[2560,12],
        'num_categories':10
    })

input_size = (2560, 12)
net_num = 10
inputs_list = [Input(shape=input_size) for _ in range(net_num)]

outputs = resnetmodel.build_network(inputs_list,0.5,num_classes=10,**params)
model = Model(inputs=inputs_list, outputs=outputs)
print(model.summary())
'''
pass

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 2560, 12)     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 2560, 12)     0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 2560, 12)     0                                            
__________________________________________________________________________________________________
input_4 (InputLayer) 

In [None]:
checkpointer = ModelCheckpoint(filepath=model_path + 'jupyter_wcl_extend_weights-best_one_fold_0801.hdf5',
                               monitor='val_fmeasure', verbose=1, save_best_only=True,
                               save_weights_only=True,
                               mode='max')  # val_fmeasure
reduce = ReduceLROnPlateau(monitor='val_fmeasure', factor=0.5, patience=2, verbose=1, min_delta=1e-6,
                           mode='max')
  
earlystop = EarlyStopping(monitor='val_fmeasure', patience=5)

config = Config()
add_compile(model, config)

callback_lists = [checkpointer, reduce]

history = model.fit_generator(generator=training_generator,
                              validation_data=validation_generator,
                              use_multiprocessing=False,
                              epochs=30,
                              verbose=1,
                              callbacks=callback_lists)

Learning rate:  0.1
Instructions for updating:
Use tf.cast instead.
Epoch 1/30

Epoch 00001: val_fmeasure improved from -inf to 0.01110, saving model to ./official_densenet_c_model/jupyter_wcl_extend_weights-best_one_fold_0801.hdf5
Epoch 2/30

Epoch 00002: val_fmeasure improved from 0.01110 to 0.04118, saving model to ./official_densenet_c_model/jupyter_wcl_extend_weights-best_one_fold_0801.hdf5
Epoch 3/30

Epoch 00003: val_fmeasure improved from 0.04118 to 0.11908, saving model to ./official_densenet_c_model/jupyter_wcl_extend_weights-best_one_fold_0801.hdf5
Epoch 4/30

Epoch 00004: val_fmeasure did not improve from 0.11908
Epoch 5/30

Epoch 00005: val_fmeasure improved from 0.11908 to 0.12072, saving model to ./official_densenet_c_model/jupyter_wcl_extend_weights-best_one_fold_0801.hdf5
Epoch 6/30

Epoch 00006: val_fmeasure did not improve from 0.12072
Epoch 7/30

Epoch 00007: val_fmeasure improved from 0.12072 to 0.14018, saving model to ./official_densenet_c_model/jupyter_wcl_exten

In [9]:
def read_data_seg(data_path, split="Train", preprocess=False, fs=500, newFs=256, winSecond=10, winNum=10, n_index=0,pre_type="sym"):
    """ Read data """

    # Fixed params
    # n_index = 0
    n_class = 10
    winSize = winSecond * fs
    new_winSize = winSecond * newFs
    # Paths
    path_signals = os.path.join(data_path, split)

    # Read labels and one-hot encode
    # label_path = os.path.join(data_path, "reference.txt")
    # labels = pd.read_csv(label_path, sep='\t',header = None)
    # labels = pd.read_csv("reference.csv")

    # Read time-series data
    channel_files = os.listdir(path_signals)
    # print(channel_files)
    channel_files.sort()
    n_channels = 12  # len(channel_files)
    # posix = len(split) + 5

    # Initiate array
    list_of_channels = []

    X = np.zeros((len(channel_files), new_winSize, n_channels)).astype('float32')
    i_ch = 0

    channel_name = ['V6', 'aVF', 'I', 'V4', 'V2', 'aVL', 'V1', 'II', 'aVR', 'V3', 'III', 'V5']
    channel_mid_name = ['II', 'aVR', 'V2', 'V5']
    channel_post_name = ['III', 'aVF', 'V3', 'V6']

    for i_ch, fil_ch in enumerate(channel_files[:]):  # tqdm

        if i_ch % 1000 == 0:
            print(i_ch)

        ecg = sio.loadmat(os.path.join(path_signals, fil_ch))
        ecg_length = ecg["I"].shape[1]

        if ecg_length > fs * winNum * winSecond:
            print(" too long !!!", ecg_length)
            ecg_length = fs * winNum * winSecond
        if ecg_length < 4500:
            print(" too short !!!", ecg_length)
            break

        slide_steps = int((ecg_length - winSize) / winSecond)

        if ecg_length <= 4500:
            slide_steps = 0

        ecg_channels = np.zeros((new_winSize, n_channels)).astype('float32')

        for i_n, ch_name in enumerate(channel_name):

            ecg_channels[:, i_n] = signal.resample(ecg[ch_name]
                                                   [:, n_index * slide_steps:n_index * slide_steps + winSize].T
                                                   , new_winSize).T
            if preprocess:
                if pre_type == "sym":
                    ecg_channels[:, i_n] = ecg_preprocessing(ecg_channels[:, i_n].reshape(1, new_winSize), 'sym8', 8, 3,
                                                             newFs, removebaseline=False, normalize=False)[0]
                elif pre_type == "db4":
                    ecg_channels[:, i_n] = wavelet(ecg_channels[:, i_n], 'db4', 4, 2, 4)
                elif pre_type == "db6":
                    ecg_channels[:, i_n] = wavelet_db6(ecg_channels[:, i_n])

                #ecg_channels[:, i_n] = (ecg_channels[:, i_n]-np.mean(ecg_channels[:, i_n]))/np.std(ecg_channels[:, i_n])
            else:
                pass
                print(" no preprocess !!! ")

        X[i_ch, :, :] = ecg_channels

    return X

In [8]:
'''   
pre_type = "sym"#"db6"#"sym"

labels = pd.read_csv(path + "REFERENCE.csv")
raw_IDs = labels["File_name"].values.tolist()

IDs = {}
IDs["sym"] = raw_IDs
IDs["db4"] = [i + "_db4" for i in raw_IDs]
IDs["db6"] = [i + "_db6" for i in raw_IDs]

#######################
input_size = (2560, 12)
net_num = 10
inputs_list = [Input(shape=input_size) for _ in range(net_num)]
net = Net()
outputs = net.nnet(inputs_list, 0.5, num_classes=10)
model = Model(inputs=inputs_list, outputs=outputs) # without attention and maxpooling and separableconv1d   f=0.819

model_path = './official_densenet_model/'
model_name = "densenet_extend_weights-best_k0_r0_0730_f0819.hdf5"#'densenet_extend_weights-best_one_fold_0730.hdf5'

model.load_weights(model_path + model_name)
##########################

X = np.empty((6689, 10, 2560, 12))
for i, ID in enumerate(IDs[pre_type]):
    if i % 1000 == 0:
        print(i)
    X[i,] = np.load("training_data/" + ID + ".npy")
#train_x = [(X[:, i]-np.mean(X[:, i]))/np.std(X[:, i]) for i in range(10)]
train_x = [X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6], X[:, 7], X[:, 8], X[:, 9]]

index = np.arange(6689)
y_train = preprocess_y(labels, index)

blend_train = model.predict(train_x)


threshold = np.arange(0.1, 0.9, 0.1)
acc = []
accuracies = []
best_threshold = np.zeros(blend_train.shape[1])

for i in range(blend_train.shape[1]):
    y_prob = np.array(blend_train[:, i])
    for j in threshold:
        y_pred = [1 if prob >= j else 0 for prob in y_prob]
        acc.append(f1_score(y_train[:, i], y_pred, average='macro'))
    acc = np.array(acc)
    index = np.where(acc == acc.max())
    accuracies.append(acc.max())
    best_threshold[i] = threshold[index[0][0]]
    acc = []

print("best_threshold :", best_threshold)

y_pred = np.array([[1 if blend_train[i, j] >= best_threshold[j] else 0 for j in range(blend_train.shape[1])]
          for i in range(len(blend_train))])
print(" train data f1_score  :", f1_score(y_train, y_pred, average='macro'))

for i in range(10):
    print("f1 score of ab {} is {}".format(i, f1_score(y_train[:, i], y_pred[:, i], average='macro')))

net_num = 10
test_x = [read_data_seg(path, split='Val', preprocess=True, n_index=i, pre_type=pre_type) for i in range(net_num)]

out = model.predict(test_x)
y_pred_test = np.array(
    [[1 if out[i, j] >= best_threshold[j] else 0 for j in range(out.shape[1])] for i in range(len(out))])

classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

test_y = y_pred_test

y_pred = [[1 if test_y[i, j] >= best_threshold[j] else 0 for j in range(test_y.shape[1])]
          for i in range(len(test_y))]
pred = []
for j in range(test_y.shape[0]):
    pred.append([classes[i] for i in range(10) if y_pred[j][i] == 1])

val_dataset_path = path + "/Val/"
val_files = os.listdir(val_dataset_path)
val_files.sort()

with open('jupyter_answers_densenet_{}_0730.csv'.format(pre_type), 'w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['File_name', 'label1', 'label2',
                     'label3', 'label4', 'label5', 'label6', 'label7', 'label8', 'label9', 'label10'])
    count = 0
    for file_name in val_files:
        if file_name.endswith('.mat'):

            record_name = file_name.strip('.mat')
            answer = []
            answer.append(record_name)

            result = pred[count]

            answer.extend(result)
            for i in range(10 - len(result)):
                answer.append('')
            count += 1
            writer.writerow(answer)
    csvfile.close()
'''
pass

0
1000
2000
3000
4000
5000
6000
best_threshold : [0.6 0.4 0.5 0.3 0.8 0.3 0.2 0.6 0.5 0.4]
 train data f1_score  : 0.9170350887582916
f1 score of ab 0 is 0.957481079747925
f1 score of ab 1 is 0.9795340664065972
f1 score of ab 2 is 0.9485398876720832
f1 score of ab 3 is 0.981503460013572
f1 score of ab 4 is 0.9459028856107661
f1 score of ab 5 is 0.9612389571778641
f1 score of ab 6 is 0.9565726928187226
f1 score of ab 7 is 0.9380835132410156
f1 score of ab 8 is 0.9408324140945324
f1 score of ab 9 is 0.9214892280967387
0
0
0
0
0
0
0
0
0
0


In [7]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(penalty="l2",C=1.0)


pre_type = "sym"#"db6"#"sym"

labels = pd.read_csv(path + "REFERENCE.csv")
raw_IDs = labels["File_name"].values.tolist()

IDs = {}
IDs["sym"] = raw_IDs
IDs["db4"] = [i + "_db4" for i in raw_IDs]
IDs["db6"] = [i + "_db6" for i in raw_IDs]

''' '''
input_size = (2560, 12)
net_num = 10
inputs_list = [Input(shape=input_size) for _ in range(net_num)]
net = Net()
outputs = net.nnet(inputs_list, 0.5, num_classes=10)
model = Model(inputs=inputs_list, outputs=outputs)

model_path = './official_densenet_model/'
model_name = "densenet_extend_weights-best_k0_r0_0730_f0819.hdf5"#'densenet_extend_weights-best_one_fold_0730.hdf5'

model.load_weights(model_path + model_name)


X = np.empty((6689, 10, 2560, 12))
for i, ID in enumerate(IDs[pre_type]):
    if i % 1000 == 0:
        print(i)
    X[i,] = np.load("training_data/" + ID + ".npy")
#train_x = [(X[:, i]-np.mean(X[:, i]))/np.std(X[:, i]) for i in range(10)]
train_x = [X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6], X[:, 7], X[:, 8], X[:, 9]]

index = np.arange(6689)
y_train = preprocess_y(labels, index)

blend_train = model.predict(train_x)


clf = OneVsRestClassifier(LR)
clf.fit(blend_train,y_train)

y_pred = clf.predict(blend_train)

print(" train data f1_score  :", f1_score(y_train, y_pred, average='macro'))
for i in range(10):
    print("f1 score of ab {} is {}".format(i, f1_score(y_train[:, i], y_pred[:, i], average='macro')))
    
net_num = 10
test_x = [read_data_seg(path, split='Val', preprocess=True, n_index=i, pre_type=pre_type) for i in range(net_num)]
out = model.predict(test_x)

y_pred = clf.predict(out)

classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

pred = []
for j in range(y_pred.shape[0]):
    pred.append([classes[i] for i in range(10) if y_pred[j][i] == 1])
    
val_dataset_path = path + "/Val/"
val_files = os.listdir(val_dataset_path)
val_files.sort()

with open('jupyter_answers_resnet_{}_0801.csv'.format(pre_type), 'w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['File_name', 'label1', 'label2',
                     'label3', 'label4', 'label5', 'label6', 'label7', 'label8', 'label9', 'label10'])
    count = 0
    for file_name in val_files:
        if file_name.endswith('.mat'):

            record_name = file_name.strip('.mat')
            answer = []
            answer.append(record_name)

            result = pred[count]

            answer.extend(result)
            for i in range(10 - len(result)):
                answer.append('')
            count += 1
            writer.writerow(answer)
    csvfile.close()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
0
1000
2000
3000
4000
5000
6000
 train data f1_score  : 0.9125444090482804
f1 score of ab 0 is 0.9595746632606921
f1 score of ab 1 is 0.9775561013582433
f1 score of ab 2 is 0.9468489571244378
f1 score of ab 3 is 0.9800131614987035
f1 score of ab 4 is 0.9420075605534346
f1 score of ab 5 is 0.9593534469662444
f1 score of ab 6 is 0.9478716357260153
f1 score of ab 7 is 0.9339349808100037
f1 score of ab 8 is 0.9419145061506937
f1 score of ab 9 is 0.9196882148829897
0
0
0
0
0
0
0
0
0
0


# predict quarter final data
## densenet 10nets 4block 
### model : densenet_extend_weights-best_k{}_r{}_0806_30.hdf5

In [10]:
pre_type = "db6"

#labels = pd.read_csv(path + "REFERENCE.csv")
labels = pd.read_csv("/media/uuser/data/final_run/reference.csv")
raw_IDs = labels["File_name"].values.tolist()

IDs = {}
IDs["sym"] = raw_IDs
IDs["db4"] = [i + "_db4" for i in raw_IDs]
IDs["db6"] = [i + "_db6" for i in raw_IDs]

X = np.empty((6500, 10, 2560, 12))
for i, ID in enumerate(IDs[pre_type]):
    X[i,] = np.load("/media/uuser/data/final_run/training_data/" + ID + ".npy")
#train_x = [(X[:, i]-np.mean(X[:, i]))/np.std(X[:, i]) for i in range(10)]
train_x = [X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6], X[:, 7], X[:, 8], X[:, 9]]

def preprocess_y(labels, y, num_class=10):
    bin_label = np.zeros((len(y), num_class)).astype('int8')
    for i in range(len(y)):
        label_nona = labels.loc[y[i]].dropna()
        for j in range(1, label_nona.shape[0]):
            bin_label[i, int(label_nona[j])] = 1
    return bin_label

NameError: name 'net_num' is not defined

In [11]:
net_num = 10
test_x = [read_data_seg(path, split='Val', preprocess=True, n_index=i, pre_type=pre_type) for i in range(net_num)]

index = np.arange(6500)
y_train = preprocess_y(labels, index)

input_size = (2560, 12)
net_num = 10
inputs_list = [Input(shape=input_size) for _ in range(net_num)]
net = Net()
outputs = net.nnet(inputs_list, 0.5, num_classes=10, attention=False)
model = Model(inputs=inputs_list, outputs=outputs)
# print(model.summary())
n_classes = 10
n_fold = 3
model_path = './official_densenet_model/'
#'densenet_extend_weights-best_one_fold_0607.hdf5'
blend_train = np.zeros((6500, n_fold, n_classes)).astype('float32')
en_amount = 1
for seed in range(en_amount):
    for i in range(n_fold):
        model_name = "densenet_extend_weights-best_k{}_r{}_0806_30.hdf5".format(seed, i)
        model.load_weights(model_path + model_name)
        blend_train[:,i,:] = model.predict(train_x)

0
0
0
0
0
0
0
0
0
0
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [12]:
blend_train.shape

(6500, 3, 10)

In [13]:
train_pd0 = pd.DataFrame(blend_train[:,0,:])
train_pd1 = pd.DataFrame(blend_train[:,1,:])
train_pd2 = pd.DataFrame(blend_train[:,2,:])
csv_path = "/media/jdcloud/quarter_final/"
train_pd0.to_csv(csv_path+"densenet_4block_10net_fold0.csv",index=None)
train_pd1.to_csv(csv_path+"densenet_4block_10net_fold1.csv",index=None)
train_pd2.to_csv(csv_path+"densenet_4block_10net_fold2.csv",index=None)


# predict quarter final data
## attention 10nets 
### model : attention_extend_weights-best_k{}_r{}_0802_30.hdf5

In [14]:
from resnet_ecg import attentionmodel

pre_type = "sym"

#labels = pd.read_csv(path + "REFERENCE.csv")
labels = pd.read_csv("/media/uuser/data/final_run/reference.csv")
raw_IDs = labels["File_name"].values.tolist()

IDs = {}
IDs["sym"] = raw_IDs
IDs["db4"] = [i + "_db4" for i in raw_IDs]
IDs["db6"] = [i + "_db6" for i in raw_IDs]

# X = np.empty((6500, 10, 2560, 12))
# for i, ID in enumerate(IDs[pre_type]):
#     X[i,] = np.load("/media/uuser/data/final_run/training_data/" + ID + ".npy")
# #train_x = [(X[:, i]-np.mean(X[:, i]))/np.std(X[:, i]) for i in range(10)]
# train_x = [X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6], X[:, 7], X[:, 8], X[:, 9]]

# def preprocess_y(labels, y, num_class=10):
#     bin_label = np.zeros((len(y), num_class)).astype('int8')
#     for i in range(len(y)):
#         label_nona = labels.loc[y[i]].dropna()
#         for j in range(1, label_nona.shape[0]):
#             bin_label[i, int(label_nona[j])] = 1
#     return bin_label

# net_num = 10
# test_x = [read_data_seg(path, split='Val', preprocess=True, n_index=i, pre_type=pre_type) for i in range(net_num)]

index = np.arange(6500)
y_train = preprocess_y(labels, index)

input_size = (2560, 12)
net_num = 10
inputs_list = [Input(shape=input_size) for _ in range(net_num)]
outputs = attentionmodel.build_network(inputs_list, 0.5, num_classes=10, block_size=4, relu=False)
model = Model(inputs=inputs_list, outputs=outputs)

# print(model.summary())
n_classes = 10
n_fold = 3
model_path = './official_attention_model/'
#'densenet_extend_weights-best_one_fold_0607.hdf5'
blend_train = np.zeros((6500, n_fold, n_classes)).astype('float32')
en_amount = 1
for seed in range(en_amount):
    for i in range(n_fold):
        model_name = "attention_extend_weights-best_k{}_r{}_0802_30.hdf5".format(seed, i)
        model.load_weights(model_path + model_name)
        blend_train[:,i,:] = model.predict(train_x)

In [15]:
blend_train.shape

(6500, 3, 10)

In [16]:
train_pd0 = pd.DataFrame(blend_train[:,0,:])
train_pd1 = pd.DataFrame(blend_train[:,1,:])
train_pd2 = pd.DataFrame(blend_train[:,2,:])
csv_path = "/media/jdcloud/quarter_final/"
train_pd0.to_csv(csv_path+"attention_10net_fold0.csv",index=None)
train_pd1.to_csv(csv_path+"attention_10net_fold1.csv",index=None)
train_pd2.to_csv(csv_path+"attention_10net_fold2.csv",index=None)

In [6]:
pre_type = "db6"# "sym"

labels = pd.read_csv("/media/uuser/data/final_run/reference.csv")
raw_IDs = labels["File_name"].values.tolist()

IDs = {}
IDs["sym"] = raw_IDs
IDs["db4"] = [i + "_db4" for i in raw_IDs]
IDs["db6"] = [i + "_db6" for i in raw_IDs]

input_size = (2560, 12)
net_num = 10
inputs_list = [Input(shape=input_size) for _ in range(net_num)]
net = Net()
outputs = net.nnet(inputs_list, 0.5, num_classes=10)
model = Model(inputs=inputs_list, outputs=outputs)

#net_num = 10
test_x = [read_data_seg(path, split='Val', preprocess=True, n_index=i, pre_type=pre_type) for i in range(net_num)]

model_path = './official_densenet_model/'
model_name = 'densenet_extend_weights-best_one_fold.hdf5'

en_amount = 1
for seed in range(en_amount):
    print("************************")
    n_fold = 3  # 3
    n_classes = 10

    kfold = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=2019)
    kf = kfold.split(IDs[pre_type], labels['label1'])

    blend_train = np.zeros((6689, n_fold, n_classes)).astype('float32')  # len(train_x)
    blend_test = np.zeros((558, n_fold, n_classes)).astype('float32')  # len(test_x)

    count = 0

    for i, (index_train, index_valid) in enumerate(kf):
        print('fold: ', i + 1, ' training')
        t = time.time()

        tr_IDs = np.array(IDs[pre_type]) # [index_train]
        # val_IDs = np.array(IDs[pre_type])[index_valid]
        print(tr_IDs.shape)

        X = np.empty((tr_IDs.shape[0], 10, 2560, 12))
        for j, ID in enumerate(tr_IDs):
            X[j, ] = np.load("training_data/" + ID + ".npy")
        # X_tr = [(X[:, i] - np.mean(X[:, i])) / np.std(X[:, i]) for i in range(10)]
        X_tr = [X[:, 0], X[:, 1], X[:, 2], X[:, 3], X[:, 4], X[:, 5], X[:, 6], X[:, 7], X[:, 8], X[:, 9]]
        # print(X.shape)
        del X

        # Evaluate best trained model
        model.load_weights(model_path + 'densenet_extend_weights-best_k{}_r{}_0801_30_attention_maxpooling.hdf5'.format(seed, i))

        blend_train[:, i, :] = model.predict(X_tr)
        blend_test[:, i, :] = model.predict(test_x)

        del X_tr
        gc.collect()
        gc.collect()
        count += 1

************************
fold:  1  training
(6500,)


InternalError: cuDNN launch failure : input shape([32,12,1,2561]) filter shape([1,32,12,16])
	 [[{{node conv1d_295/convolution/Conv2D}}]]
	 [[{{node dense_3/Sigmoid}}]]

In [6]:
blend_train.shape

(6500, 10)

In [7]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(penalty="l2",C=1.0)

pre_type = "sym"#"db6"#"sym"
#labels = pd.read_csv(path + "REFERENCE.csv")
labels = pd.read_csv("/media/uuser/data/final_run/reference.csv")

index = np.arange(6500)
y_train = preprocess_y(labels, index)

x_train = np.hstack([blend_train])#([blend_train[:,0,:],blend_train[:,1,:],blend_train[:,2,:]])

clf = OneVsRestClassifier(LR)
clf.fit(x_train,y_train)

OneVsRestClassifier(estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False),
          n_jobs=None)

In [9]:
y_pred = clf.predict(x_train)

print(" train data f1_score  :", f1_score(y_train, y_pred, average='macro'))
for i in range(10):
    print("f1 score of ab {} is {}".format(i, f1_score(y_train[:, i], y_pred[:, i], average='macro')))

 train data f1_score  : 0.8272480277114355
f1 score of ab 0 is 0.9637256297957846
f1 score of ab 1 is 0.9807119372336763
f1 score of ab 2 is 0.9432881279278955
f1 score of ab 3 is 0.9712819792054501
f1 score of ab 4 is 0.9236742866093681
f1 score of ab 5 is 0.96047140497336
f1 score of ab 6 is 0.9503812738049132
f1 score of ab 7 is 0.9464851736487506
f1 score of ab 8 is 0.9446709240836247
f1 score of ab 9 is 1.0


In [18]:
out = np.hstack([blend_test[:,0,:],blend_test[:,1,:],blend_test[:,2,:]])#

y_pred = clf.predict(out)

classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

pred = []
for j in range(y_pred.shape[0]):
    pred.append([classes[i] for i in range(10) if y_pred[j][i] == 1])
    
val_dataset_path = path + "/Val/"
val_files = os.listdir(val_dataset_path)
val_files.sort()

with open('jupyter_answers_densenet_{}_0801_attention_maxpooling.csv'.format(pre_type), 'w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['File_name', 'label1', 'label2',
                     'label3', 'label4', 'label5', 'label6', 'label7', 'label8', 'label9', 'label10'])
    count = 0
    for file_name in val_files:
        if file_name.endswith('.mat'):

            record_name = file_name.strip('.mat')
            answer = []
            answer.append(record_name)

            result = pred[count]

            answer.extend(result)
            for i in range(10 - len(result)):
                answer.append('')
            count += 1
            writer.writerow(answer)
    csvfile.close()