In [2]:
import os, glob, platform
import numpy as np
import numpy.matlib
import pickle
import pandas as pd
import pathlib
import matplotlib
import matplotlib.pyplot as plt
import mne
mne.__version__
from mne.viz import plot_alignment, snapshot_brain_montage
import shutil
from mne.datasets import eegbci
from sklearn.model_selection import train_test_split

# from mne_bids import write_raw_bids, BIDSPath, print_dir_tree, make_dataset_description
# from mne_bids.stats import count_events
import sys


In [3]:
path_utils = '/decoding_toolbox_py/helper_funcs' 
sys.path.append(path_utils)

In [4]:
''' VARIABLES '''

dataset = 'eeg'

amount_of_subjects = 4 # Change the range so the process is faster
if amount_of_subjects > 26: amount_of_subjects = 26
subjs_list = ['s{:02d}'.format(i) for i in range(1, amount_of_subjects+1) if i != 6 ] 
print(subjs_list)
nSubj = len(subjs_list)

numC = 8

angles = [i * 180./numC for i in range(numC)]

x_labels = np.array(angles)

resample = True # speeds up the procees but showing worse results overall
if resample: resample_frequency = 20 # in Hz, original freq is 500Hz

cfg_stim = dict()
cfg_stim['kappa'] = 4
cfg_stim['NumC'] = numC
cfg_stim['Tuning'] = 'vonmises'
# cfg_stim['Tuning'] = 'halfRectCos'
cfg_stim['offset'] = 0

cfg_train = dict()
cfg_train['gamma'] = 0.1
cfg_train['demean'] = True
cfg_train['returnPattern'] = True

cfg_test = dict()
cfg_test['demean'] = 'traindata'

['s01', 's02', 's03', 's04']


In [6]:
from methods import read_data
all_rawdata = read_data (task = 'main', resample=False,subjs_list=subjs_list, resample_frequency=20)

In [7]:
all_rawdata[0]['metadata'].columns

Index(['index', 'subj', 'nblock', 'ntrial', 'nrep', 'trial_type', 'cond-1',
       'cond', 'rDV', 'DV', 'resp', 'deci-2', 'deci-1', 'deci', 'corr-1',
       'r_map', 'correct', 'confi', 'RT', 'd1', 'conf_lvl', 'correct-1', 'd2',
       'd3', 'd4', 'd5', 'd6', 'o1', 'o2', 'o3', 'o4', 'o5', 'o6', 'confi-1',
       'conf_lvl-1'],
      dtype='object')

In [8]:
A = np.random.random((3,2,5))
print(A)
A = A.reshape((6,5))
print(A.shape)
A

[[[0.43184895 0.45195743 0.95569793 0.04368472 0.17922004]
  [0.74466998 0.03886003 0.95273872 0.99519334 0.63155751]]

 [[0.5955432  0.7683401  0.28255609 0.38890707 0.87248319]
  [0.23176965 0.32154674 0.67778989 0.54797933 0.5050915 ]]

 [[0.26912909 0.62502693 0.96091254 0.28693689 0.69712973]
  [0.72806582 0.66321277 0.23828041 0.67868125 0.8657708 ]]]
(6, 5)


array([[0.43184895, 0.45195743, 0.95569793, 0.04368472, 0.17922004],
       [0.74466998, 0.03886003, 0.95273872, 0.99519334, 0.63155751],
       [0.5955432 , 0.7683401 , 0.28255609, 0.38890707, 0.87248319],
       [0.23176965, 0.32154674, 0.67778989, 0.54797933, 0.5050915 ],
       [0.26912909, 0.62502693, 0.96091254, 0.28693689, 0.69712973],
       [0.72806582, 0.66321277, 0.23828041, 0.67868125, 0.8657708 ]])

In [9]:
b = np.random.random((5))
b = np.tile(b,3)
b

array([0.89693542, 0.43572326, 0.70526   , 0.3384594 , 0.05880538,
       0.89693542, 0.43572326, 0.70526   , 0.3384594 , 0.05880538,
       0.89693542, 0.43572326, 0.70526   , 0.3384594 , 0.05880538])

In [10]:
nSubj = 4
shapes = [None] * (nSubj+1)
add_labels = True

X = all_rawdata[0]['epoch_dat']
print(X.shape)
# X = X.reshape(X.shape[0],-1)
y = all_rawdata[0]['metadata']['deci']
y = np.array(y)
print(y.shape)

for i in range(1, nSubj):

    X_temp = all_rawdata[i]['epoch_dat']
    # X_temp = X_temp.reshape((X_temp.shape[0]*X_temp.shape[1], X_temp.shape[2]))
    # shapes[i+1] = shapes[i] + X_temp.shape[0]
    y_temp = all_rawdata[i]['metadata']['deci']
    # y_temp = np.tile(y_temp, numC)

    X = np.vstack((X, X_temp))
    y = np.concatenate((y, y_temp))

print(X.shape, y.shape)

X = X.reshape(X.shape[0], -1)
print(X.shape, y.shape)

(250, 32, 2876)
(250,)
(1003, 32, 2876) (1003,)
(1003, 92032) (1003,)


In [10]:
from sklearn.svm import SVC
# from pyrcn.echo_state_network import ESNClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print(X_train.shape, y_train.shape)

# scaler = StandardScaler()

clf = CatBoostClassifier(task_type = 'GPU')
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on test set:", accuracy)



(802, 92032) (802,)
Accuracy on test set: 0.6019900497512438


RF all subjects 80-20 = 0.556809024979855

RF, Logistic, SVC = 1 sub 0.66

XGB 1 sub 0.62

XGB 26 0.5495568090249798

Ada 0.6

GB 0.62

0.66


catboost and RF both on more data with stacked are perfoming the same

next im gonna add labels for the subjects


In [11]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Perform standard scaling on the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Print the shapes of the train set
print(X_train_scaled.shape, y_train.shape)

# Create a random forest classifier
clf = LogisticRegression()

# Fit the classifier on the scaled training data
clf.fit(X_train_scaled, y_train)

# Predict on the scaled test data
y_pred = clf.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on test set:", accuracy)


(802, 92032) (802,)
Accuracy on test set: 0.5771144278606966


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
