In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from keras.optimizers import Adam

from keras.optimizers.legacy import Adam as LegacyAdam

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pickle as pkl
from src.utils.data_transform import *
from src.utils.preprocessing import load_split_data
import pandas as pd
import os 
import pickle 
import json
from src.utils.data_io import save_data

In [2]:
def load_data(test_subject, subject_to_indices):
    training_data, testing_data = [], []
    training_labels, testing_labels = [], []

    # Load data by sessions based on subject_to_indices mapping
    for subject, sessions in subject_to_indices.items():
        subject_data, subject_labels = [], []
        for session_id in sessions:
            session_data, session_labels = load_session_data(f"../data/ProcessedSubjects/MajorityLabel/sessions/full/session_{session_id}.pkl")
            subject_data.append(session_data)
            subject_labels.append(session_labels)
        
        # Aggregate data for each subject
        subject_data = np.concatenate(subject_data, axis=0)
        subject_labels = np.concatenate(subject_labels, axis=0)
        
        # Distribute data into training or testing based on subject ID
        if str(subject) == str(test_subject):
            testing_data.append(subject_data)
            testing_labels.append(subject_labels)
        else:
            training_data.append(subject_data)
            training_labels.append(subject_labels)

    # Combine all training and testing data and labels
    training_data = np.concatenate(training_data, axis=0)
    training_labels = np.concatenate(training_labels, axis=0)
    testing_data = np.concatenate(testing_data, axis=0)
    testing_labels = np.concatenate(testing_labels, axis=0)

    return training_data, training_labels, testing_data, testing_labels

In [3]:
def load_session_data(path):
    data = pd.read_pickle(path)
    signal_data = np.array([item[0] for item in data])
    signal_data = signal_data[:, :, 1:]  # Exclude the timestamps column
    label_data = np.array([item[1] for item in data])
    return signal_data, label_data

In [4]:
def build_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=10, activation='relu', input_shape=input_shape, padding='same'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=128, kernel_size=10, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(5, activation='softmax'))  # Assuming 5 classes for the output layer
    # optimizer = Adam(learning_rate=1e-3)
    optimizer = LegacyAdam(learning_rate=1e-3)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])
    return model

In [5]:
with open("../data/dataset-info-json/subject_to_indices.json", "r") as f:
    subject_to_indices = json.load(f)

subject_to_indices = {int(k): v for k, v in subject_to_indices.items()}

In [None]:
for test_subject in subject_to_indices.keys():
    # Load the data
    print(f"Training without {test_subject}")
    results = []
    accuracy = []
    loss = []
    model = build_model(input_shape=(20,6))
    train_data, train_labels, test_data, test_labels = load_data(test_subject, subject_to_indices)
    history = model.fit(train_data, train_labels, epochs=32, batch_size=64)
    results.append(model.evaluate(test_data, test_labels))
    accuracy.append(history.history['accuracy'])
    loss.append(history.history['loss'])
    model.save(f"../models/full_loso/majority_label/processed/model_{test_subject}.keras")
    
    save_data(results, "../models/full_loso/majority_label/processed/training_info/", f"results_{test_subject}")
    save_data(accuracy, "../models/full_loso/majority_label/processed/training_info/", f"accuracy_{test_subject}")
    save_data(loss, "../models/full_loso/majority_label/processed/training_info/", f"loss_{test_subject}")


Training without 1


2024-02-02 21:01:07.933288: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Max
2024-02-02 21:01:07.933344: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-02-02 21:01:07.933356: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-02-02 21:01:07.933425: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-02 21:01:07.933448: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/32


2024-02-02 21:01:08.458243: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Training without 2
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32

In [None]:
history.history['accuracy']

In [None]:
# epochs = range(len(acc))  # Number of epochs

In [None]:
np.mean(accuracy[0][:])
avg_accuracy = [np.mean(sublist) for sublist in accuracy]
avg_accuracy

In [None]:
np.mean(avg_accuracy)

In [None]:
np.mean(loss)

In [None]:
# # Plotting training and validation accuracy
# plt.figure(figsize=(12, 4))
# plt.subplot(1, 2, 1)
# plt.plot(epochs, acc, label='Training Accuracy')
# # plt.plot(epochs, val_acc, label='Validation Accuracy')
# plt.title('Training Accuracy')
# plt.legend()
# 
# # Plotting training and validation loss
# plt.subplot(1, 2, 2)
# plt.plot(epochs, loss, label='Training Loss')
# # plt.plot(epochs, val_loss, label='Validation Loss')
# plt.title('Training Loss')
# plt.legend()
# 
# plt.show()

In [None]:
# def build_lstm():
#     lstm_model = Sequential()
#     lstm_model.add(TimeDistributed(cnn_model, input_shape=(35, input_shape[0], input_shape[1])))  # Assuming input_shape is (20, 6)
#     lstm_model.add(LSTM(64, activation='tanh', recurrent_activation='hard_sigmoid', return_sequences=True))
#     lstm_model.add(LSTM(64, activation='tanh', recurrent_activation='hard_sigmoid'))
#     lstm_model.add(Dropout(0.5))
#     lstm_model.add(Dense(1, activation='sigmoid'))  # Binary classification
# 
#     lstm_model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
#     return lstm_model
