In [19]:
import numpy as np
import tensorflow as tf
import pandas as pd
import os
from glob import glob
import matplotlib.pyplot as plt


In [4]:
DATA_PATH = 'C:\\Users\\Sean\\Desktop\\AI4E_A3\\train' # Directory containing all the data
DATA_FREQUENCY = 500 # The dataset is sampled at 500 Hz

# Take the first 6 series of each subject to be the training set
train_data_files = glob(os.path.join(DATA_PATH, 'subj*_series[1-6]_data.csv'))
train_event_files = glob(os.path.join(DATA_PATH, 'subj*_series[1-6]_events.csv'))

# Take the 7th and 8th series to be the test set
test_data_files = glob(os.path.join(DATA_PATH, 'subj*_series[7-8]_data.csv'))
test_event_files = glob(os.path.join(DATA_PATH, 'subj*_series[7-8]_events.csv'))



# Remove the first column and convert to NUMPY array, each row is a sample and each column is a feature.
x_train_data_series = [pd.read_csv(file, index_col=None, header=0).iloc[:,1:].to_numpy() for file in train_data_files] # List of the training time series' from each subj_series
y_train_data_series = [pd.read_csv(file, index_col=None, header=0).iloc[:,1:].to_numpy() for file in train_event_files]

x_test_data_series = [pd.read_csv(file, index_col=None, header=0).iloc[:,1:].to_numpy() for file in test_data_files]
y_test_data_series = [pd.read_csv(file, index_col=None, header=0).iloc[:,1:].to_numpy() for file in test_event_files]

In [5]:
# Need to go through each of the x_train_data_series and pick out the actual train data which will be 1 second before the event 
def generate_input_dataset(x_train_data_series, y_train_data_series, t_lookback=2, sample_reduction=50):
    # t_lookback specifies how many seconds to look back for LSTM
    # sample_reduction specifies how much to downsample the data by (e.g. for 10 means 500/10 = 50 Hz, 50 menans 500/50 = 10 Hz). This mimics a reduced sampling rate of EEG data by taking every nth reading
    n_frames = t_lookback * DATA_FREQUENCY # Number of data frames to look back NOT REDUCED
    x = [] # Train data
    y = [] # Train labels

    # Go through each series/events
    for series_data, series_events in zip(x_train_data_series, y_train_data_series):
        
        for i in range(series_data.shape[0]-n_frames):
            
            if np.sum(series_events[i+n_frames]) > 0: # If there is an event in the frame n_frames ahead
                x.append(series_data[i:i+n_frames:sample_reduction]) # Append the data

                y.append(series_events[i+n_frames]) # Append the label for the event with the historicla data in mind
                

    return np.array(x), np.array(y)

x_train, y_train = generate_input_dataset(x_train_data_series, y_train_data_series)

In [10]:
print(x_train.shape)
print(y_train.shape)


(1717359, 20, 32)
(1717359, 6)


In [8]:
model = tf.keras.models.Sequential(layers=[

    tf.keras.layers.Input((x_train.shape[1], x_train.shape[2])), # Should be frames*features
    tf.compat.v1.keras.layers.CuDNNLSTM(units=32), # Need 32 units to match the number of features
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(6, activation='sigmoid') # 6 possible events from 0-1

    ]
)



model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 cu_dnnlstm_1 (CuDNNLSTM)    (None, 32)                8448      
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 6)                 198       
                                                                 
Total params: 8,646
Trainable params: 8,646
Non-trainable params: 0
_________________________________________________________________


In [12]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 

In [33]:
# model.evaluate(x_train, y_train)
# We want to evaluate the whole time series and potentially get a plot to compare the predicted events with the actual events (noting the different time scales)

# plt.figure(figsize=(15,10))
# plt.legend([1,2,3,4,5,6], loc='upper left')
# plt.plot(y_test_data_series[0])


In [39]:
def prediction_test_time_series(x_test_data_series, y_test_data_series, t_lookback=2, sample_reduction=50):

    n_frames = t_lookback * DATA_FREQUENCY # Number of data frames to look back NOT REDUCED

    for x_test in x_test_data_series:
        print(x_test.shape)

        # for i in range(x_test.shape[0]):


# TODO: Need to do a time series prediction for each of the test data series
# NOTE: Need to downsample the test data series by the same amount as the training data series
# TODO: Need to actually train a model as well to see how it goes on the train data series
        