In [1]:
# installs after TF setup https://www.tensorflow.org/install/pip 

# !pip install pandas
# !pip install mne
# !pip install scikit-learn

In [2]:
import os, glob, platform
import numpy as np
import numpy.matlib
import pickle
import pandas as pd
import pathlib
import matplotlib
import matplotlib.pyplot as plt
import mne
mne.__version__
from mne.viz import plot_alignment, snapshot_brain_montage
import shutil
from mne.datasets import eegbci
from sklearn.model_selection import train_test_split

# from mne_bids import write_raw_bids, BIDSPath, print_dir_tree, make_dataset_description
# from mne_bids.stats import count_events
import sys


In [3]:
path_utils = '/decoding_toolbox_py/helper_funcs' 
sys.path.append(path_utils)

In [4]:
''' VARIABLES '''

dataset = 'eeg'

amount_of_subjects = 4 # Change the range so the process is faster
if amount_of_subjects > 26: amount_of_subjects = 26
subjs_list = ['s{:02d}'.format(i) for i in range(1, amount_of_subjects+1) if i != 6 ] 
print(subjs_list)
nSubj = len(subjs_list)

numC = 8

angles = [i * 180./numC for i in range(numC)]

x_labels = np.array(angles)

resample = True # speeds up the procees but showing worse results overall
if resample: resample_frequency = 20 # in Hz, original freq is 500Hz

cfg_stim = dict()
cfg_stim['kappa'] = 4
cfg_stim['NumC'] = numC
cfg_stim['Tuning'] = 'vonmises'
# cfg_stim['Tuning'] = 'halfRectCos'
cfg_stim['offset'] = 0

cfg_train = dict()
cfg_train['gamma'] = 0.1
cfg_train['demean'] = True
cfg_train['returnPattern'] = True

cfg_test = dict()
cfg_test['demean'] = 'traindata'

['s01', 's02', 's03', 's04']


In [5]:
'''EEG Dataset'''
def read_data(
        number_of_repetition=3,
        resample=False,
        resample_frequency = 20,
        subjs_list = subjs_list,
        task = 'main'
        
        ):
    path = 'Cond_CJ_EEG'

    epochs = []
    all_epochs = []
    all_rawdata = []
    all_st_epochs = []
    all_st_rawdata = []
    for subject_id in subjs_list:
        preproc_path = os.path.join(path, subject_id)

        if task == 'main':
            epoch = mne.read_epochs(os.path.join(preproc_path, 'main_epo.fif'), verbose=False)
            
            if resample: 
                print('Frequency before:', epoch.info['sfreq'])
                epoch = epoch.resample(resample_frequency)
                print('Frequency after:' ,epoch.info['sfreq'])

            epochs.append(epoch.average())
            all_epochs.append(epoch)
            all_rawdata.append({'epoch_dat': epoch.get_data(), 'metadata': epoch.metadata})
            
        if task == 'stim':
        
            st_epoch = mne.read_epochs(os.path.join(preproc_path, 'mainstim_epo.fif'), verbose=False)
            # print(st_epoch.info['sfreq'])
            if resample: 
                print('Frequency before:', st_epoch.info['sfreq'])
                st_epoch = st_epoch.resample(resample_frequency)
                print('Frequency after:' ,st_epoch.info['sfreq'])
                
            all_st_epochs.append(st_epoch)
            all_st_rawdata.append(
                {
                'epoch_dat': st_epoch.get_data()[st_epoch.metadata['nrep'] == number_of_repetition,:,:] ,
                'metadata': st_epoch.metadata[st_epoch.metadata['nrep'] == number_of_repetition]
                }
                )
    if task == 'main':
        return all_rawdata
    else:
        return all_st_rawdata


In [6]:
all_rawdata = read_data (task = 'main', resample=False, resample_frequency=20)

NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).
NOTE: pick_channels() is a legacy function. New code should use inst.pick(...).


In [7]:
all_rawdata[0]['metadata'].columns

Index(['index', 'subj', 'nblock', 'ntrial', 'nrep', 'trial_type', 'cond-1',
       'cond', 'rDV', 'DV', 'resp', 'deci-2', 'deci-1', 'deci', 'corr-1',
       'r_map', 'correct', 'confi', 'RT', 'd1', 'conf_lvl', 'correct-1', 'd2',
       'd3', 'd4', 'd5', 'd6', 'o1', 'o2', 'o3', 'o4', 'o5', 'o6', 'confi-1',
       'conf_lvl-1'],
      dtype='object')

In [8]:
A = np.random.random((3,2,5))
print(A)
A = A.reshape((6,5))
print(A.shape)
A

[[[0.43317591 0.39689854 0.2512219  0.37900989 0.52127578]
  [0.43033983 0.28654573 0.19453379 0.33942963 0.55372774]]

 [[0.84489442 0.5221797  0.55998961 0.5201057  0.05592732]
  [0.70733644 0.00306953 0.59388274 0.29201387 0.0041646 ]]

 [[0.32625931 0.80469591 0.17487919 0.21001503 0.92391696]
  [0.22113803 0.5236896  0.42206746 0.20366072 0.67937401]]]
(6, 5)


array([[0.43317591, 0.39689854, 0.2512219 , 0.37900989, 0.52127578],
       [0.43033983, 0.28654573, 0.19453379, 0.33942963, 0.55372774],
       [0.84489442, 0.5221797 , 0.55998961, 0.5201057 , 0.05592732],
       [0.70733644, 0.00306953, 0.59388274, 0.29201387, 0.0041646 ],
       [0.32625931, 0.80469591, 0.17487919, 0.21001503, 0.92391696],
       [0.22113803, 0.5236896 , 0.42206746, 0.20366072, 0.67937401]])

In [9]:
b = np.random.random((5))
b = np.tile(b,3)
b

array([0.8703574 , 0.70913709, 0.46110587, 0.43927577, 0.30340531,
       0.8703574 , 0.70913709, 0.46110587, 0.43927577, 0.30340531,
       0.8703574 , 0.70913709, 0.46110587, 0.43927577, 0.30340531])

In [10]:
neural_network = True
if neural_network:
    X = all_rawdata[0]['epoch_dat']
    y = all_rawdata[0]['metadata']['deci']
    for i in range(1, nSubj):
        X = np.concatenate((X, all_rawdata[i]['epoch_dat']), axis=0)
        y = np.concatenate((y, all_rawdata[i]['metadata']['deci']))
    print(X.shape, y.shape)
    numF, numC, numT = X.shape


(1003, 32, 2876) (1003,)


In [11]:
# !pip install tensorflow

In [12]:
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

# MODEL
model = keras.Sequential()
model.add(layers.LSTM(units=128, return_sequences=True, input_shape=(numC, numT)))
model.add(layers.Dropout(0.2))
model.add(layers.LSTM(units=32))
model.add(layers.Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with callbacks
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

# Load the best saved model
best_model = keras.models.load_model('best_model.h5')


# Evaluate the model
loss, accuracy = best_model.evaluate(X_test, y_test)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)


: 

: 

RF all subjects 80-20 = 0.556809024979855

RF, Logistic, SVC = 1 sub 0.66

XGB 1 sub 0.62

XGB 26 0.5495568090249798

Ada 0.6

GB 0.62

0.66


catboost and RF both on more data with stacked are perfoming the same

next im gonna add labels for the subjects


In [None]:
# X = all_rawdata[0]['epoch_dat']
# print(X.shape)
# # X = X.reshape(X.shape[0],-1)
# y = all_rawdata[0]['metadata']['deci']
# 
# X2 = all_rawdata[2]['epoch_dat' ]
# y2 = all_rawdata[2]['metadata']['deci']
# 
# X = np.concatenate((X,X2), axis = 0)
# y = np.concatenate((y,y2))
# 
# print(X.shape, y.shape)
# for i in range(1, nSubj):
# 
#     X_temp = all_rawdata[i]['epoch_dat']
#     X_temp = X_temp.reshape((X_temp.shape[0]*X_temp.shape[1], X_temp.shape[2]))
#     shapes[i+1] = shapes[i] + X_temp.shape[0]
#     y_temp = all_rawdata[i]['metadata']['deci']
#     y_temp = np.tile(y_temp, numC)
# 
#     X = np.vstack((X, X_temp))
#     y = np.concatenate((y, y_temp))
# 
# print(shapes)
# 
# print(X.shape, y.shape)
# if add_labels:
#     labels = np.zeros((nSubj, X.shape[0]))
#     for i in range (nSubj):
#         for j in range(shapes[i], shapes[i+1]):
#             labels[i,j] = 1
#     print(labels.shape)
#     
#     labels = np.transpose(labels)
#     X = np.hstack((X, labels))
#     print(X.shape, labels.shape)