[CHB-Mit Seizure Dataset.](https://www.kaggle.com/datasets/adibadea/chbmitseizuredataset/data)

In [3]:

import numpy as np
import pandas as pd
import os
import importlib.util as util


from matplotlib import pyplot as plt

In [4]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import cross_validate, GridSearchCV, train_test_split


In [5]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, LSTM, Dense, MaxPooling1D, Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint

In [12]:
for root, dirs, files in os.walk('data'):
    for file in files:
        print(os.path.join(root, file))

# Data sampled at frequency 256 Hz from 23 channels , n samples.
# Each sample represents 1 second.
# Training, Testing , Validation data have shape (n,23,256).
# npz file may be saved with pickle if strings or complex objects are present

npz_train1 = np.load('data/eeg-seizure_train.npz', allow_pickle=True)
train_signals = npz_train1['train_signals']
train_labels = npz_train1['train_labels']
print(f"Training data (signals) have shape {train_signals.shape}")
print(f"Training labels have shape {train_labels.shape}")

npz_val1 = np.load('data/eeg-seizure_val.npz', allow_pickle=True)
val_signals = npz_val1['val_signals']
val_labels = npz_val1['val_labels']
print(f"Validation data (signals) have shape {val_signals.shape}")
print(f"Validation labels have shape {val_labels.shape}")

npz_test1 = np.load('data/eeg-seizure_test.npz', allow_pickle=True)
test_signals = npz_test1['test_signals']
print(f"Test data (signals) have shape {test_signals.shape}")
# no test label file is available as this dataset if from a kaggle competition
print(npz_test1.keys())

npz_balanced1 = np.load('data/eeg-seizure_val_balanced.npz', allow_pickle=True)
npz_balanced1.keys()
val_balanced_signals = npz_balanced1['val_signals']
val_balanced_labels = npz_balanced1['val_labels']



data\eeg-predictive_train.npz
data\eeg-predictive_val.npz
data\eeg-predictive_val_balanced.npz
data\eeg-seizure_test.npz
data\eeg-seizure_train.npz
data\eeg-seizure_val.npz
data\eeg-seizure_val_balanced.npz
Training data (signals) have shape (37666, 23, 256)
Training labels have shape (37666,)
Validation data (signals) have shape (8071, 23, 256)
Validation labels have shape (8071,)
Test data (signals) have shape (8072, 23, 256)
KeysView(NpzFile 'data/eeg-seizure_test.npz' with keys: test_signals)


In [15]:
train_signals.shape, train_labels.shape


((37666, 23, 256), (37666,))

In [22]:

def dense_baseline(train_data, train_label, validation_data, validation_label):
    inputs = Input(shape=train_data.shape[1:])
    x = Flatten()(inputs)
    x = Dense(16, activation='relu')(x)
    outputs = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=inputs, outputs=outputs)

    callbacks_ = [ModelCheckpoint("dense_baseline.keras",
                                 save_best_only = True)]
    model.compile(optimizer='adam',loss='binary_crossentropy', metrics =['mae'])
    history = model.fit(train_data, train_label,
                        epochs=10,
                        validation_data=(validation_data, validation_label),
                        callbacks=callbacks_,
                        verbose=1)
    model = load_model('dense_baseline.keras')
    print(f"Test MAE: {model.evaluate(val_signals, val_labels)}")


# 2 minutes to run
dense_baseline(train_signals, train_labels, val_signals, val_labels)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test MAE: [0.49401986598968506, 0.252445787191391]


In [11]:
class Tools():
    def __init__(self):
        pass

    @staticmethod
    def train_svm(data, labels):
        if data.shape == 2:
            X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)
        elif len(data.shape) == 3:
            n_samples, timesteps, features = data.shape
            data_reshaped = np.reshape(data, (n_samples,timesteps*features))
            X_train, X_test, y_train, y_test = train_test_split(data_reshaped, labels, test_size=0.2)
        else:
            print("Incompatible data shape")
            return

        params = [
            {
                "kernel":["linear"],
                "C":[1,10,100]
            }
            # ,
            # {
            #     "kernel":["rbf"],
            #     "C":[1,10,100] ,
            #     "gamma":[0.001,0.01,0.1,1,10,100]
            # }
        ]

        svm = SVC(probability=True, verbose=3)

        # 10 fold cross-validation
        clf = GridSearchCV(svm, params,cv=2, n_jobs=1, verbose=1)

        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)

        print("Best parameters set: {clf.best_params_}")

        matrix_labels = sorted(list(set(y_test.flatten())))
        print("Confusion matrix")
        print(f"Labels: {','.join(matrix_labels)}")

        print("\nConfusion matrix:")
        print("Labels: {0}\n".format(",".join(labels)))
        print(confusion_matrix(y_test, y_predict, labels=labels))
        print(f"\nClassification report: {classification_report(y_test, y_predict)}")

In [42]:
class Tools2():
    def __init__(self):
        pass

    @staticmethod
    def train_svm(data, labels):
        if data.shape == 2:
            X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)
        elif len(data.shape) == 3:
            n_samples, timesteps, features = data.shape
            data_reshaped = np.reshape(data, (n_samples,timesteps*features))
            X_train, X_test, y_train, y_test = train_test_split(data_reshaped, labels, test_size=0.2)
        else:
            print("Incompatible data shape")
            return

        params = [
            {
                "kernel":["linear"],
                "C":[1,10,100]
            }
            ,
            {
                "kernel":["rbf"],
                "C":[1,10,100] ,
                "gamma":[0.001,0.01,0.1,1,10,100]
            }
        ]

        #svm = SVC(probability=True, verbose=3)
        svm = SVC(probability=True, verbose=1)

        # 10 fold cross-validation
        clf = GridSearchCV(svm, params,cv=2, n_jobs=1, verbose=1)

        clf.fit(X_train, y_train)
        y_predict = clf.predict(X_test)

        #print("Best parameters set: {clf.best_params_}")


        print("\nConfusion matrix:")
        print(confusion_matrix(y_test, y_predict, labels=labels))
        print("\nClassification report:")
        print(classification_report(y_test, y_predict, labels=labels))

In [43]:
Tools2.train_svm(train_signals[:1000], train_labels[:1000])
#len(train_signals.shape)


Fitting 2 folds for each of 21 candidates, totalling 42 fits
[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]
Confusion matrix:
[[  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0 161   0]
 [  0   0   0 ...   0  39   0]]

Classification report:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00        39
           0       0.81      1.00      0.89       161
           0       0.81      1.00      0.89       161
           1       0.00      0.00      0.00        39
           0       0.81      1.00      0.89       161
           0       0.81      1.00  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
class PrototypeModel():
    def __init__(self, shape):
        self.shape = shape

    def hybrid(self):
        model = Sequential()
        model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=self.shape))
        model.add(MaxPooling1D(pool_size=2))

        model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
        model.add(MaxPooling1D(pool_size=2))

        #model.add(Flatten())
        model.add(LSTM(128, activation='relu', return_sequences=False))

        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['Recall','Precision'])
        model.summary()
        return model


In [11]:
p_model = PrototypeModel(train_signals.shape[1:])
cnn_lstm = p_model.hybrid()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 21, 64)            49216     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 10, 64)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 8, 128)            24704     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 4, 128)           0         
 1D)                                                             
                                                                 
 lstm (LSTM)                 (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 1)                 1

In [12]:
callbacks = [ModelCheckpoint('seizure_CNN_LSTM.keras', save_best_only=True)]
cnn_lstm_history = cnn_lstm.fit(train_signals, train_labels, epochs=10, batch_size=32,
            validation_data=(val_signals,val_labels), callbacks=callbacks)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
if util.find_spec('matplotlib') is None:
    print("matplotlib module not available")

In [34]:
import matplotlib.pyplot as plt
def plot_summary(history):
    epochs = range(1,len(history.history['loss'])+1)
    recall = history.history['recall']
    precision = history.history['precision']
    plt.figure()
    plt.plot(epochs, recall, 'bo', label='Recall')
    plt.plot(epochs, precision, 'g+', label='Precision')
    plt.title('Recall and Precision')
    plt.legend()
    plt.show()

r_= cnn_lstm_history.history['recall']
p_ = cnn_lstm_history.history['precision']
list(range(4,7))


[4, 5, 6]

In [35]:

plot_summary(cnn_lstm.history)

<IPython.core.display.Javascript object>

In [14]:
cnn_lstm_model = load_model('seizure_CNN_LSTM.keras')


In [23]:
print(f"Test accuracy {cnn_lstm_model.evaluate(val_balanced_signals, val_balanced_labels, verbose=1)}")

Test accuracy [0.66585373878479, 0.6672306656837463]
