<a href="https://colab.research.google.com/github/ysaidcan/federated-biomedical-informatics/blob/master/CmpE_492.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy
import tensorflow as tf
from keras.optimizers import SGD
from keras.optimizers import RMSprop
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras import backend as K
from keras import optimizers

import sklearn
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from imblearn.under_sampling import RandomUnderSampler


from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler



### Prepare and Save Data (Run only once!)

In [None]:
X = pd.read_csv('/content/drive/MyDrive/empatica_data_new/one_sec_sorted.csv')

X['SessionLabel'] = LabelEncoder().fit_transform(X['SessionLabel'])
y = X['SessionLabel']

In [None]:
# normalize
scaler = MinMaxScaler(feature_range=(0, 1))
sensor_columns = ['ACC_mean', 'BVP_mean', 'EDA_mean', 'TEMP_mean']

for col in sensor_columns:
    X[col] = scaler.fit_transform(numpy.asarray(X[col]).reshape(-1, 1))

In [None]:
# create and save overlapping windows with labels

X_windows = [] # n x TIME_STEPS x 4 
y_windows = [] # n x 1

for sess in set(X["SessionID"]):
    X_sess = X.loc[X['SessionID'] == sess]
    X_sess.reset_index(inplace=True, drop=True)

    print(sess)

    for i in range(TIME_STEPS-1, len(X_sess)):
        label = X_sess.iloc[i]['SessionLabel']
        data = X_sess.iloc[i-TIME_STEPS+1: i+1][['ACC_mean', 'BVP_mean', 'EDA_mean', 'TEMP_mean']].values

        y_windows.append(label)
        X_windows.append(data)

X_numpy = numpy.array(X_windows)
y_numpy = numpy.array(y_windows)
numpy.save('/content/drive/MyDrive/empatica_data_new/X_windows.npy', X_numpy)
numpy.save('/content/drive/MyDrive/empatica_data_new/y_windows.npy', y_numpy)

A0176C
A01B22
A0120B
A019A8
A01644
A02214
A01035
A012A4
A010BE
A021AD
A020B9
A0133E
A012B4


### Constants

In [None]:
BATCH_SIZE = 500
EPOCHS = 90
TIME_STEPS = 60

### Load Data, RUS

In [None]:
X_windows = numpy.load('/content/drive/MyDrive/empatica_data_new/X_windows.npy')
y_windows = numpy.load('/content/drive/MyDrive/empatica_data_new/y_windows.npy')
print(X_windows.shape)
print(y_windows.shape)

(2171233, 60, 4)
(2171233,)


In [None]:
print(X_windows.shape, y_windows.shape)

dim2 = X_windows.shape[1]
dim3 = X_windows.shape[2]

rus = RandomUnderSampler(random_state=13)

X_windows, y_windows = rus.fit_resample(X_windows.reshape(X_windows.shape[0], dim2 * dim3), y_windows)
X_windows = X_windows.reshape(X_windows.shape[0], dim2, dim3)

print(X_windows.shape, y_windows.shape)

(2171233, 60, 4) (2171233,)




(1078446, 60, 4) (1078446,)


### Run Model

In [None]:
kfold = KFold(n_splits=5, shuffle=False)

lstm_model = None
k = 0

for train, test in kfold.split(X_windows, y_windows):
    k += 1
    if k != 5:
        continue
    print('FOLD', k)

    X_train = X_windows[train]
    y_train = y_windows[train]

    X_test = X_windows[test]
    y_test = y_windows[test]

    X_train, y_train = sklearn.utils.shuffle(X_train, y_train)
    X_test, y_test = sklearn.utils.shuffle(X_test, y_test)

    print("X_train and y_train size:", len(X_train), len(y_train))
    print("X_test and y_test size:", len(X_test), len(y_test))

    print(X_train.shape, y_train.shape)
    print(X_test.shape, y_test.shape)

    train_class_weight = {0: sum(y_train)/len(y_train), 
                          1: 1-sum(y_train)/len(y_train)}

    print('Train weights:', train_class_weight)
    print('1s in test', sum(y_test)/len(y_test))


    lstm_model = Sequential()
    lstm_model.add(LSTM(200, input_shape=X_train.shape[1:], dropout=0, recurrent_dropout=0.0, stateful=False, return_sequences=True, kernel_initializer='random_uniform'))
    lstm_model.add(LSTM(200, input_shape=X_train.shape[1:], dropout=0, recurrent_dropout=0.0, stateful=False, return_sequences=False, kernel_initializer='random_uniform'))
    lstm_model.add(Dense(200,activation='relu'))
    lstm_model.add(Dense(200,activation='relu'))
    lstm_model.add(Dense(1,activation='sigmoid'))

    opt = optimizers.Adam(learning_rate=0.001)
    
    lstm_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

    history = lstm_model.fit(X_train, y_train, 
                             epochs=EPOCHS, 
                             verbose=1,
                             batch_size=BATCH_SIZE,
                             validation_data=(X_test, y_test),
                             class_weight=train_class_weight)

FOLD 5
X_train and y_train size: 862757 862757
X_test and y_test size: 215689 215689
(862757, 60, 4) (862757,)
(215689, 60, 4) (215689,)
Train weights: {0: 0.3750001448843649, 1: 0.624999855115635}
1s in test 1.0
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
Epoch 1/90
Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90