### Original Model Structure with Class Weight Processing

In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv1D, MaxPooling1D, AveragePooling1D, Dropout
from keras.layers import Activation, BatchNormalization
from keras.optimizers import Adam
from keras.utils import np_utils
import tensorflow as tf
from toolkit import plot_confusion_matrix
from sklearn.utils import class_weight
from keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
np.set_printoptions(suppress=True)

trainD = np.load("/home/hsiehch/30s/train_data.npy")
trainL = np.load("/home/hsiehch/30s/train_label.npy")
validationD = np.load("/home/hsiehch/30s/validation_data.npy")
validationL = np.load("/home/hsiehch/30s/validation_label.npy")
testD = np.load("/home/hsiehch/30s/test_data.npy")
testL = np.load("/home/hsiehch/30s/test_label.npy")

trainD = np.append(trainD, validationD, axis=0)
trainL = np.append(trainL, validationL, axis=0)
trainD = np.append(trainD, testD, axis=0)
trainL = np.append(trainL, testL, axis=0)

trainData = trainD.reshape((trainD.shape[0], trainD.shape[1], 1))
trainLabel = np_utils.to_categorical(trainL, 4)
print('Train Data:', trainData.shape)
print('Train Label: ', trainLabel.shape)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Train Data: (10151, 9000, 1)
Train Label:  (10151, 4)


In [2]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True)
print(kf)

for train_index, test_index in kf.split(trainData):
    print('trian:', train_index, 'len', len(train_index), 'test:', test_index, 'len', len(test_index))
#     print(trainData[train_index])
#     print(trainData[test_index])

KFold(n_splits=5, random_state=None, shuffle=True)
trian: [    0     1     3 ... 10147 10149 10150] len 8120 test: [    2     4     7 ... 10133 10141 10148] len 2031
trian: [    0     1     2 ... 10148 10149 10150] len 8121 test: [   18    19    30 ... 10135 10138 10145] len 2030
trian: [    0     1     2 ... 10148 10149 10150] len 8121 test: [   13    17    20 ... 10143 10144 10146] len 2030
trian: [    0     1     2 ... 10148 10149 10150] len 8121 test: [    5     6     8 ... 10126 10139 10142] len 2030
trian: [    2     4     5 ... 10145 10146 10148] len 8121 test: [    0     1     3 ... 10147 10149 10150] len 2030


In [3]:
from sklearn.metrics import f1_score

def create_model():
    model = Sequential()
    model.add(Conv1D(filters = 32, kernel_size = 7, input_shape = (trainData.shape[1], 1)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 256, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 256, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 512, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 512, kernel_size = 3))
    model.add(Activation('relu'))

    model.add(Flatten())
    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation = 'relu'))
    model.add(Dense(4, activation = "softmax"))
    
    model.compile(optimizer = 'adam', loss = "categorical_crossentropy", metrics=['accuracy'])

    return model

fold = 1
cw = {0: 2.80862832,
      1: 8.46333333,
      2: 0.42600671,
      3: 0.84859626}
for train_index, test_index in kf.split(trainData):
    
    print('{}-Fold'.format(fold))
    model = create_model()
    early_stop = EarlyStopping(patience=25)
    history = model.fit(x = trainData[train_index],
                        y = trainLabel[train_index],
                        epochs=200,
                        validation_data=(trainData[test_index], trainLabel[test_index]),
                        callbacks=[early_stop],
                        class_weight=cw,
                        batch_size=70, 
                        verbose=0)
    
    evaluation = model.evaluate(x = trainData[train_index], y = trainLabel[train_index])
    print('Training:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    
    evaluation = model.evaluate(x = trainData[test_index], y = trainLabel[test_index])
    print('Testing:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    test_prediction = model.predict_classes(trainData[test_index], batch_size=1)
    cnf_matrix = confusion_matrix(trainLabel[test_index].argmax(axis=1), test_prediction)
    plot_confusion_matrix.plot_confusion_matrix(cnf_matrix, classes=['AF','Noise','Normal','Other'],
                      index=fold, save_png=True)
    
    validation_prediction = model.predict_classes(trainData[test_index], batch_size=100)
    validation_prediction = np_utils.to_categorical(validation_prediction, 4)
    result = f1_score(trainLabel[test_index], validation_prediction, average=None)
    print('F1-score:')
    print(result)
    
    fold = fold + 1
    del model

1-Fold


KeyboardInterrupt: 

### Fixed Model Structure with Class Weight Processing

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv1D, MaxPooling1D, AveragePooling1D, Dropout
from keras.layers import Activation, BatchNormalization
from keras.optimizers import Adam
from keras.utils import np_utils
import tensorflow as tf
from toolkit import plot_confusion_matrix
from sklearn.utils import class_weight
from keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
np.set_printoptions(suppress=True)

trainD = np.load("/home/hsiehch/30s/train_data.npy")
trainL = np.load("/home/hsiehch/30s/train_label.npy")
validationD = np.load("/home/hsiehch/30s/validation_data.npy")
validationL = np.load("/home/hsiehch/30s/validation_label.npy")
testD = np.load("/home/hsiehch/30s/test_data.npy")
testL = np.load("/home/hsiehch/30s/test_label.npy")

trainD = np.append(trainD, validationD, axis=0)
trainL = np.append(trainL, validationL, axis=0)
trainD = np.append(trainD, testD, axis=0)
trainL = np.append(trainL, testL, axis=0)

trainData = trainD.reshape((trainD.shape[0], trainD.shape[1], 1))
trainLabel = np_utils.to_categorical(trainL, 4)
print('Train Data:', trainData.shape)
print('Train Label: ', trainLabel.shape)

In [None]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True)
print(kf)

for train_index, test_index in kf.split(trainData):
    print('trian:', train_index, 'len', len(train_index), 'test:', test_index, 'len', len(test_index))
#     print(trainData[train_index])
#     print(trainData[test_index])

In [None]:
from sklearn.metrics import f1_score

def create_model():
    model = Sequential()
    model.add(Conv1D(filters = 32, kernel_size = 7, input_shape = (trainData.shape[1], 1)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 256, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 256, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 512, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 512, kernel_size = 3))
    model.add(Activation('relu'))

    model.add(Flatten())
    model.add(Dense(512, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation = 'relu'))
    model.add(Dense(4, activation = "softmax"))
    
    model.compile(optimizer = 'adam', loss = "categorical_crossentropy", metrics=['accuracy'])

    return model

fold = 1
cw = {0: 2.80862832,
      1: 8.46333333,
      2: 0.42600671,
      3: 0.84859626}
for train_index, test_index in kf.split(trainData):
    
    print('{}-Fold'.format(fold))
    model = create_model()
    early_stop = EarlyStopping(patience=20)
    history = model.fit(x = trainData[train_index],
                        y = trainLabel[train_index],
                        epochs=200,
                        validation_data=(trainData[test_index], trainLabel[test_index]),
                        callbacks=[early_stop],
                        class_weight=cw,
                        batch_size=70, 
                        verbose=0)
    
    evaluation = model.evaluate(x = trainData[train_index], y = trainLabel[train_index])
    print('Training:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    
    evaluation = model.evaluate(x = trainData[test_index], y = trainLabel[test_index])
    print('Testing:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    test_prediction = model.predict_classes(trainData[test_index], batch_size=1)
    cnf_matrix = confusion_matrix(trainLabel[test_index].argmax(axis=1), test_prediction)
    plot_confusion_matrix.plot_confusion_matrix(cnf_matrix, classes=['AF','Noise','Normal','Other'],
                      index=fold, save_png=True)
    
    validation_prediction = model.predict_classes(trainData[test_index], batch_size=100)
    validation_prediction = np_utils.to_categorical(validation_prediction, 4)
    result = f1_score(trainLabel[test_index], validation_prediction, average=None)
    print('F1-score:')
    print(result)
    
    fold = fold + 1
    del model

### Normalized data with its model

In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv1D, MaxPooling1D, AveragePooling1D, Dropout
from keras.layers import BatchNormalization, Activation
from keras.optimizers import Adam
from keras.utils import np_utils
import tensorflow as tf
from keras.callbacks import EarlyStopping
from sklearn.utils import class_weight
from toolkit import normalization
from sklearn.metrics import confusion_matrix
from toolkit import plot_confusion_matrix
np.set_printoptions(suppress=True)

trainD = np.load("/home/hsiehch/30s/train_data.npy")
trainL = np.load("/home/hsiehch/30s/train_label.npy")
validationD = np.load("/home/hsiehch/30s/validation_data.npy")
validationL = np.load("/home/hsiehch/30s/validation_label.npy")
testD = np.load("/home/hsiehch/30s/test_data.npy")
testL = np.load("/home/hsiehch/30s/test_label.npy")

trainD = normalization.normalize_arr(trainD)
validationD = normalization.normalize_arr(validationD)
testD = normalization.normalize_arr(testD)

trainData = trainD.reshape((trainD.shape[0], trainD.shape[1], 1))
trainLabel = np_utils.to_categorical(trainL, 4)
validationData = validationD.reshape((validationD.shape[0], validationD.shape[1], 1))
validationLabel = np_utils.to_categorical(validationL, 4)
testData = testD.reshape((testD.shape[0], testD.shape[1], 1))
testLabel = np_utils.to_categorical(testL, 4)

cw = class_weight.compute_sample_weight('balanced', [0,1,2,3], trainL)

print('Train Data:', trainData.shape)
print('Train Label: ', trainLabel.shape)
print('Vali Data: ', validationData.shape)
print('Vali Label: ', validationLabel.shape)
print('Test Data: ', testData.shape)
print('Test Label: ', testLabel.shape)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Normalization done!
Normalization done!
Normalization done!
Train Data: (5078, 9000, 1)
Train Label:  (5078, 4)
Vali Data:  (2032, 9000, 1)
Vali Label:  (2032, 4)
Test Data:  (3041, 9000, 1)
Test Label:  (3041, 4)


In [2]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True)
print(kf)

for train_index, test_index in kf.split(trainData):
    print('trian:', train_index, 'len', len(train_index), 'test:', test_index, 'len', len(test_index))

KFold(n_splits=5, random_state=None, shuffle=True)
trian: [   0    1    2 ... 5074 5076 5077] len 4062 test: [   3   13   14 ... 5059 5067 5075] len 1016
trian: [   0    1    2 ... 5075 5076 5077] len 4062 test: [   7   10   11 ... 5063 5071 5072] len 1016
trian: [   0    1    3 ... 5075 5076 5077] len 4062 test: [   2   16   17 ... 5066 5068 5069] len 1016
trian: [   0    1    2 ... 5072 5073 5075] len 4063 test: [   6    9   25 ... 5074 5076 5077] len 1015
trian: [   2    3    6 ... 5075 5076 5077] len 4063 test: [   0    1    4 ... 5065 5070 5073] len 1015


In [3]:
from sklearn.metrics import f1_score

def create_model():
    model = Sequential()
    model.add(Conv1D(filters = 32, kernel_size = 7, input_shape = (trainData.shape[1], 1)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 5))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 5))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 5))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))

    model.add(Flatten())
    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation = 'relu'))
    model.add(Dense(4, activation = "softmax"))
    
    adam = Adam(lr = 0.0005)
    model.compile(optimizer = adam, loss = "categorical_crossentropy", metrics=['accuracy'])

    return model

fold = 1
cw = {0: cw[0],
      1: cw[1],
      2: cw[2],
      3: cw[3]}
for train_index, test_index in kf.split(trainData):
    
    print('{}-Fold'.format(fold))
    model = create_model()
    early_stop = EarlyStopping(patience=20)
    history = model.fit(x = trainData[train_index],
                        y = trainLabel[train_index],
                        epochs=200,
                        validation_data=(trainData[test_index], trainLabel[test_index]),
                        callbacks=[early_stop],
                        class_weight=cw,
                        batch_size=70, 
                        verbose=0)
    
    evaluation = model.evaluate(x = trainData[train_index], y = trainLabel[train_index])
    print('Training:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    
    evaluation = model.evaluate(x = trainData[test_index], y = trainLabel[test_index])
    print('Testing:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    test_prediction = model.predict_classes(trainData[test_index], batch_size=1)
    cnf_matrix = confusion_matrix(trainLabel[test_index].argmax(axis=1), test_prediction)
    plot_confusion_matrix.plot_confusion_matrix(cnf_matrix, classes=['AF','Noise','Normal','Other'],
                      index=fold, save_png=True)
    
    validation_prediction = model.predict_classes(trainData[test_index], batch_size=100)
    validation_prediction = np_utils.to_categorical(validation_prediction, 4)
    result = f1_score(trainLabel[test_index], validation_prediction, average=None)
    print('F1-score:')
    print(result)
    
    fold = fold + 1
    del model

1-Fold
Training:
Loss: 0.420, Accuracy: 0.831
Testing:
Loss: 0.703, Accuracy: 0.751
Confusion matrix, without normalization
F1-score:
[0.68965517 0.43333333 0.84834123 0.56858847]
2-Fold
Training:
Loss: 0.769, Accuracy: 0.675
Testing:
Loss: 0.793, Accuracy: 0.676
Confusion matrix, without normalization
F1-score:
[0.66968326 0.53913043 0.77117117 0.52559727]
3-Fold
Training:
Loss: 0.612, Accuracy: 0.740
Testing:
Loss: 0.710, Accuracy: 0.700
Confusion matrix, without normalization
F1-score:
[0.63716814 0.4        0.82334869 0.41089109]
4-Fold
Training:
Loss: 0.457, Accuracy: 0.828
Testing:
Loss: 0.597, Accuracy: 0.782
Confusion matrix, without normalization
F1-score:
[0.69822485 0.56521739 0.86737185 0.65185185]
5-Fold
Training:
Loss: 0.775, Accuracy: 0.532
Testing:
Loss: 0.875, Accuracy: 0.506
Confusion matrix, without normalization
F1-score:
[0.69747899 0.48780488 0.47394541 0.48677249]


====================================================

In [3]:
## second model ##

from sklearn.metrics import f1_score

def create_model():
    model = Sequential()
    model.add(Conv1D(filters = 32, kernel_size = 7, input_shape = (trainData.shape[1], 1)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 5))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 5))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 5))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))

    model.add(Flatten())
    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation = 'relu'))
    model.add(Dense(4, activation = "softmax"))
    
    adam = Adam(lr = 0.0005)
    model.compile(optimizer = adam, loss = "categorical_crossentropy", metrics=['accuracy'])

    return model

fold = 1
cw = {0: cw[0],
      1: cw[1],
      2: cw[2],
      3: cw[3]}
for train_index, test_index in kf.split(trainData):
    
    print('{}-Fold'.format(fold))
    model = create_model()
    early_stop = EarlyStopping(patience=20)
    history = model.fit(x = trainData[train_index],
                        y = trainLabel[train_index],
                        epochs=200,
                        validation_data=(trainData[test_index], trainLabel[test_index]),
                        callbacks=[early_stop],
                        class_weight=cw,
                        batch_size=70, 
                        verbose=0)
    
    evaluation = model.evaluate(x = trainData[train_index], y = trainLabel[train_index])
    print('Training:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    
    evaluation = model.evaluate(x = trainData[test_index], y = trainLabel[test_index])
    print('Testing:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    test_prediction = model.predict_classes(trainData[test_index], batch_size=1)
    cnf_matrix = confusion_matrix(trainLabel[test_index].argmax(axis=1), test_prediction)
    plot_confusion_matrix.plot_confusion_matrix(cnf_matrix, classes=['AF','Noise','Normal','Other'],
                      index=fold, save_png=True)
    
    validation_prediction = model.predict_classes(trainData[test_index], batch_size=100)
    validation_prediction = np_utils.to_categorical(validation_prediction, 4)
    result = f1_score(trainLabel[test_index], validation_prediction, average=None)
    print('F1-score:')
    print(result)
    
    fold = fold + 1
    del model

1-Fold
Training:
Loss: 0.380, Accuracy: 0.859
Testing:
Loss: 0.549, Accuracy: 0.806
F1-score:
[0.70157068 0.46666667 0.88733489 0.67611336]
2-Fold
Training:
Loss: 0.441, Accuracy: 0.829
Testing:
Loss: 0.636, Accuracy: 0.755
F1-score:
[0.70351759 0.52272727 0.85578447 0.55486542]
3-Fold
Training:
Loss: 0.460, Accuracy: 0.813
Testing:
Loss: 0.706, Accuracy: 0.761
F1-score:
[0.64516129 0.58333333 0.85536547 0.58196721]
4-Fold
Training:
Loss: 0.782, Accuracy: 0.662
Testing:
Loss: 0.847, Accuracy: 0.668
F1-score:
[0.66666667 0.41509434 0.77680141 0.48587571]
5-Fold
Training:
Loss: 1.403, Accuracy: 0.305
Testing:
Loss: 1.492, Accuracy: 0.273
F1-score:
[0.21925134 0.48648649 0.39429313 0.11441648]


### Oversampling

In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv1D, MaxPooling1D, AveragePooling1D, Dropout
from keras.layers import BatchNormalization, Activation
from keras.optimizers import Adam
from keras.utils import np_utils
import tensorflow as tf
from keras.callbacks import EarlyStopping
from sklearn.utils import class_weight
from toolkit import normalization
from sklearn.metrics import confusion_matrix
from toolkit import plot_confusion_matrix
np.set_printoptions(suppress=True)

trainD = np.load("/home/hsiehch/30s/train_data.npy")
trainL = np.load("/home/hsiehch/30s/train_label.npy")
validationD = np.load("/home/hsiehch/30s/validation_data.npy")
validationL = np.load("/home/hsiehch/30s/validation_label.npy")
testD = np.load("/home/hsiehch/30s/test_data.npy")
testL = np.load("/home/hsiehch/30s/test_label.npy")

# trainD = normalization.normalize_arr(trainD)
# validationD = normalization.normalize_arr(validationD)
# testD = normalization.normalize_arr(testD)

trainData = trainD.reshape((trainD.shape[0], trainD.shape[1], 1))
trainLabel = np_utils.to_categorical(trainL, 4)
validationData = validationD.reshape((validationD.shape[0], validationD.shape[1], 1))
validationLabel = np_utils.to_categorical(validationL, 4)
testData = testD.reshape((testD.shape[0], testD.shape[1], 1))
testLabel = np_utils.to_categorical(testL, 4)

cw = class_weight.compute_sample_weight('balanced', [0,1,2,3], trainL)

print('Train Data:', trainData.shape)
print('Train Label: ', trainLabel.shape)
print('Vali Data: ', validationData.shape)
print('Vali Label: ', validationLabel.shape)
print('Test Data: ', testData.shape)
print('Test Label: ', testLabel.shape)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Train Data: (5078, 9000, 1)
Train Label:  (5078, 4)
Vali Data:  (2032, 9000, 1)
Vali Label:  (2032, 4)
Test Data:  (3041, 9000, 1)
Test Label:  (3041, 4)


In [2]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True)
print(kf)

for train_index, test_index in kf.split(trainData):
    print('trian:', train_index, 'len', len(train_index), 'test:', test_index, 'len', len(test_index))

KFold(n_splits=5, random_state=None, shuffle=True)
trian: [   0    1    2 ... 5074 5075 5077] len 4062 test: [   9   15   21 ... 5058 5067 5076] len 1016
trian: [   0    1    2 ... 5072 5076 5077] len 4062 test: [   4    8   11 ... 5073 5074 5075] len 1016
trian: [   1    2    3 ... 5075 5076 5077] len 4062 test: [   0   12   14 ... 5052 5054 5065] len 1016
trian: [   0    1    2 ... 5074 5075 5076] len 4063 test: [   7   30   33 ... 5071 5072 5077] len 1015
trian: [   0    4    7 ... 5075 5076 5077] len 4063 test: [   1    2    3 ... 5062 5064 5068] len 1015


In [3]:
from sklearn.metrics import f1_score
from imblearn.over_sampling import SMOTE

def create_model():
    model = Sequential()
    model.add(Conv1D(filters = 32, kernel_size = 7, input_shape = (trainData.shape[1], 1)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 5))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 5))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 5))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))

    model.add(Flatten())
    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation = 'relu'))
    model.add(Dense(4, activation = "softmax"))
    
    adam = Adam(lr = 0.001)
    model.compile(optimizer = adam, loss = "categorical_crossentropy", metrics=['accuracy'])

    return model

fold = 1

sm = SMOTE(sampling_strategy = 'auto')

for train_index, test_index in kf.split(trainData):
    
    x_res, y_res = sm.fit_sample(trainD[train_index], trainL[train_index])
    x_res = x_res.reshape((x_res.shape[0], x_res.shape[1], 1))
    y_res = np_utils.to_categorical(y_res, 4)
    
    print('{}-Fold'.format(fold))
    model = create_model()
    early_stop = EarlyStopping(patience=20)
    history = model.fit(x = x_res,
                        y = y_res,
                        epochs=200,
                        validation_data=(trainData[test_index], trainLabel[test_index]),
                        callbacks=[early_stop],
                        batch_size=70, 
                        verbose=0)
    
    evaluation = model.evaluate(x = trainData[train_index], y = trainLabel[train_index])
    print('Training:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    
    evaluation = model.evaluate(x = trainData[test_index], y = trainLabel[test_index])
    print('Testing:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    test_prediction = model.predict_classes(trainData[test_index], batch_size=1)
    cnf_matrix = confusion_matrix(trainLabel[test_index].argmax(axis=1), test_prediction)
    plot_confusion_matrix.plot_confusion_matrix(cnf_matrix, classes=['AF','Noise','Normal','Other'],
                      index=fold, save_png=True)
    
    validation_prediction = model.predict_classes(trainData[test_index], batch_size=100)
    validation_prediction = np_utils.to_categorical(validation_prediction, 4)
    result = f1_score(trainLabel[test_index], validation_prediction, average=None)
    print('F1-score:')
    print(result)
    
    fold = fold + 1
    del model
    del x_res, y_res

1-Fold
Training:
Loss: 0.391, Accuracy: 0.873
Testing:
Loss: 0.649, Accuracy: 0.818
F1-score:
[0.74418605 0.5        0.88368336 0.72631579]
2-Fold
Training:
Loss: 0.298, Accuracy: 0.900
Testing:
Loss: 0.604, Accuracy: 0.794
F1-score:
[0.75531915 0.57692308 0.8645054  0.68251273]
3-Fold
Training:
Loss: 0.271, Accuracy: 0.907
Testing:
Loss: 0.703, Accuracy: 0.795
F1-score:
[0.71502591 0.57575758 0.872103   0.69736842]
4-Fold
Training:
Loss: 0.337, Accuracy: 0.872
Testing:
Loss: 0.619, Accuracy: 0.786
F1-score:
[0.70833333 0.45       0.86902928 0.628     ]
5-Fold
Training:
Loss: 0.278, Accuracy: 0.906
Testing:
Loss: 0.611, Accuracy: 0.800
F1-score:
[0.72222222 0.55319149 0.87146322 0.6910299 ]


### 3 classes

In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv1D, MaxPooling1D, AveragePooling1D, Dropout
from keras.layers import Activation, BatchNormalization
from keras.optimizers import Adam
from keras.utils import np_utils
import tensorflow as tf
from toolkit import plot_confusion_matrix
from sklearn.utils import class_weight
from keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from toolkit import to_3_classes
np.set_printoptions(suppress=True)

trainD = np.load("/home/hsiehch/30s/train_data.npy")
trainL = np.load("/home/hsiehch/30s/train_label.npy")
validationD = np.load("/home/hsiehch/30s/validation_data.npy")
validationL = np.load("/home/hsiehch/30s/validation_label.npy")
testD = np.load("/home/hsiehch/30s/test_data.npy")
testL = np.load("/home/hsiehch/30s/test_label.npy")

trainD = np.append(trainD, validationD, axis=0)
trainL = np.append(trainL, validationL, axis=0)
trainD = np.append(trainD, testD, axis=0)
trainL = np.append(trainL, testL, axis=0)

trainL = to_3_classes.to_3_classes(trainL)

trainData = trainD.reshape((trainD.shape[0], trainD.shape[1], 1))
trainLabel = np_utils.to_categorical(trainL, 3)

print('Train Data:', trainData.shape)
print('Train Label: ', trainLabel.shape)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Train Data: (10151, 9000, 1)
Train Label:  (10151, 3)


In [2]:
from sklearn.model_selection import StratifiedKFold

kf = StratifiedKFold(n_splits=5, shuffle=False)
print(kf)

for train_index, test_index in kf.split(trainD, trainL):
    print('trian:', train_index, 'len', len(train_index), 'test:', test_index, 'len', len(test_index))

StratifiedKFold(n_splits=5, random_state=None, shuffle=False)
trian: [  181   182   183 ... 10148 10149 10150] len 8120 test: [   0    1    2 ... 3287 3288 3289] len 2031
trian: [    0     1     2 ... 10148 10149 10150] len 8120 test: [ 181  182  183 ... 4479 4480 4481] len 2031
trian: [    0     1     2 ... 10148 10149 10150] len 8120 test: [ 362  363  364 ... 6511 6512 6513] len 2031
trian: [    0     1     2 ... 10148 10149 10150] len 8121 test: [5169 5170 5171 ... 8957 8958 8959] len 2030
trian: [   0    1    2 ... 8957 8958 8959] len 8123 test: [ 7200  7201  7202 ... 10148 10149 10150] len 2028


In [3]:
from sklearn.metrics import f1_score

def create_model():
    model = Sequential()
    model.add(Conv1D(filters = 32, kernel_size = 7, input_shape = (trainData.shape[1], 1)))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 32, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 7))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 64, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 128, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 256, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))

    model.add(Conv1D(filters = 256, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 512, kernel_size = 3))
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size = 2))
    model.add(Dropout(0.5))

    model.add(Conv1D(filters = 512, kernel_size = 3))
    model.add(Activation('relu'))

    model.add(Flatten())
    model.add(Dense(512, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation = 'relu'))
    model.add(Dense(3, activation = "softmax"))
    
    adam = Adam(lr = 0.0005)
    model.compile(optimizer = adam, loss = "categorical_crossentropy", metrics=['accuracy'])

    return model

fold = 1

for train_index, test_index in kf.split(trainD, trainL):
    cw = class_weight.compute_sample_weight('balanced', [0,1,2], trainL[train_index])
    cw = {0: cw[0],
          1: cw[1],
          2: cw[2]}
    print('{}-Fold'.format(fold))
    model = create_model()
    early_stop = EarlyStopping(patience=20)
    history = model.fit(x = trainData[train_index],
                        y = trainLabel[train_index],
                        epochs=150,
                        validation_data=(trainData[test_index], trainLabel[test_index]),
                        callbacks=[early_stop],
                        class_weight=cw,
                        batch_size=70, 
                        verbose=0)
    
    evaluation = model.evaluate(x = trainData[train_index], y = trainLabel[train_index])
    print('Training:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    
    evaluation = model.evaluate(x = trainData[test_index], y = trainLabel[test_index])
    print('Testing:')
    print('Loss: {:.3f}, Accuracy: {:.3f}'.format(evaluation[0], evaluation[1]))
    test_prediction = model.predict_classes(trainData[test_index], batch_size=1)
    cnf_matrix = confusion_matrix(trainLabel[test_index].argmax(axis=1), test_prediction)
    plot_confusion_matrix.plot_confusion_matrix(cnf_matrix, classes=['AF','Noise','Normal','Other'],
                      index=fold, save_png=True)
    
    validation_prediction = model.predict_classes(trainData[test_index], batch_size=100)
    validation_prediction = np_utils.to_categorical(validation_prediction, 3)
    result = f1_score(trainLabel[test_index], validation_prediction, average=None)
    print('F1-score:')
    print(result)
    
    fold = fold + 1
    del model

1-Fold
Training:
Loss: 1.100, Accuracy: 0.089
Testing:
Loss: 1.100, Accuracy: 0.089


  'precision', 'predicted', average, warn_for)


F1-score:
[0.1636528 0.        0.       ]
2-Fold
Training:
Loss: 0.180, Accuracy: 0.936
Testing:
Loss: 0.512, Accuracy: 0.850
F1-score:
[0.79807692 0.76862124 0.90184564]
3-Fold
Training:
Loss: 0.371, Accuracy: 0.855
Testing:
Loss: 0.513, Accuracy: 0.801
F1-score:
[0.67080745 0.68398994 0.88516345]
4-Fold
Training:
Loss: 1.100, Accuracy: 0.324
Testing:
Loss: 1.100, Accuracy: 0.324
F1-score:
[0.         0.48958333 0.        ]
5-Fold
Training:
Loss: 1.098, Accuracy: 0.587
Testing:
Loss: 1.098, Accuracy: 0.587
F1-score:
[0.         0.         0.73998136]
