In [4]:
import os
os.environ["LD_LIBRARY_PATH"] = "$CONDA_PREFIX/lib/python3.9/site-packages/nvidia/cudnn/lib:$LD_LIBRARY_PATH"


In [1]:
import tensorflow as tf; print(tf.__version__)

2024-05-26 14:07:08.866806: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-26 14:07:08.891629: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.16.1


In [2]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1


2024-05-26 14:07:11.912757: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-26 14:07:11.930133: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-26 14:07:11.930231: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [3]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

### Load Data

In [10]:
import os
import random

import numpy as np
import scipy.io as sio
from sklearn.model_selection import train_test_split

def load_data(dataset_dir, subject_n=5, img_size=(8, 9, 8), number_of_inputs=1,
              features_type='multi', num_classes=2, frames_per_subject=4800, seed=7):
    img_rows, img_cols, num_chan = img_size

    prefixs = ['DE/DE_s', 'PSD/PSD_s']

    if features_type == 'DE':
        prefixs = prefixs[:1]
    elif features_type == 'PSD':
        prefixs = prefixs[1:]
    elif features_type != 'multi':
        raise NotImplementedError()

    samples_number_per_subject = int(frames_per_subject / number_of_inputs)  # tested only for [1...6] range
    samples_numbers_list = list(range(samples_number_per_subject))

    random.seed(seed)

    y_v_list = []

    x_list = []

    subject_id_list = []

    for i in range(subject_n):
        short_name = f'{i + 1:02}'
        random.shuffle(samples_numbers_list)
        print("\nprocessing: ", short_name, "......")
        file_path = os.path.join(dataset_dir, prefixs[0] + short_name)
        file = sio.loadmat(file_path)
        data = file['data']
        y_v = file['valence_labels'][0]
        test = 0
        for i in y_v:
            test += i
        print(test)
        print("y_v[0:10]: ",y_v[0:10])
        if len(prefixs) > 0:
            for prefix in prefixs[1:]:
                file_path = os.path.join(dataset_dir, prefix + short_name)
                file = sio.loadmat(file_path)
                data = np.concatenate([data, file['data']], axis=1)

        print("Data shape: ", data.shape)
        print("Labels shape: ", y_v.shape)
        one_falx = data.transpose([0, 2, 3, 1])
        one_falx = one_falx.reshape((-1, number_of_inputs, img_rows, img_cols, num_chan))
        one_y_v = np.empty([0, 1])

        print("one_falx shape: ", one_falx.shape)
        print("one_y_v shape: ", one_y_v.shape)
    
        for j in range(int(len(y_v) // number_of_inputs)):
            one_y_v = np.vstack([one_y_v, y_v[j * number_of_inputs]])

        one_falx = one_falx[samples_numbers_list]
        one_y_v = one_y_v[samples_numbers_list]
        print("one_falx shape: ", one_falx.shape)
        print("one_y_v shape: ", one_y_v.shape)

        subject_id = np.array([i] * samples_number_per_subject)
        print("subject_id shape: ", subject_id.shape)

        y_v_list.append(one_y_v)

        x_list.append(one_falx)



        subject_id_list.append(subject_id)

    y_v_all_subject = np.concatenate(y_v_list)

    x_all_subject = np.concatenate(x_list)
    all_subject_id = np.concatenate(subject_id_list)

    print("y_v_all_subject shape: ", y_v_all_subject.shape)
    print("x_all_subject shape: ", x_all_subject.shape)
    print("all_subject_id shape: ", all_subject_id.shape)

    x_all_subject_train, x_all_subject_test, y_v_all_subject_train, y_v_all_subject_test = train_test_split( x_all_subject, y_v_all_subject, test_size=0.2, random_state=42)
    return y_v_all_subject_train, y_v_all_subject_test, x_all_subject_train, x_all_subject_test, all_subject_id

### Model

In [11]:
from keras.layers import (
    Average,
    BatchNormalization,
    Conv2D,
    Dense,
    Dropout,
    Flatten,
    Input,
    MaxPooling2D,
    Reshape,
)
from keras.models import Model, Sequential


def create_base_network(input_dim, dropout_rate):
    seq = Sequential()
    seq.add(keras.Input(input_dim))
    seq.add(
        Conv2D(
            64,
            5,
            activation="relu",
            padding="same",
            name="conv1",
        )
    )
    if True:
        seq.add(BatchNormalization())
    seq.add(Dropout(dropout_rate))
    seq.add(Conv2D(128, 4, activation="relu", padding="same", name="conv2"))
    if True:
        seq.add(BatchNormalization())
    seq.add(Dropout(dropout_rate))
    seq.add(Conv2D(256, 4, activation="relu", padding="same", name="conv3"))
    if True:
        seq.add(BatchNormalization())
    seq.add(Dropout(dropout_rate))
    seq.add(Conv2D(64, 1, activation="relu", padding="same", name="conv4"))
    seq.add(MaxPooling2D(2, 2, name="pool1"))
    if True:
        seq.add(BatchNormalization())
    seq.add(Dropout(dropout_rate))
    seq.add(Flatten(name="fla1"))
    seq.add(Dense(512, activation="relu", name="dense1"))
    seq.add(Reshape((1, 512), name="reshape"))
    if True:
        seq.add(BatchNormalization())
    seq.add(Dropout(dropout_rate))

    return seq


def create_MT_CNN(img_size=(8, 9, 8), dropout_rate=0.2, number_of_inputs=1):

    base_network = create_base_network(img_size, dropout_rate)

    inputs = [Input(shape=img_size) for i in range(number_of_inputs)]

    if number_of_inputs == 1:
        x = base_network(inputs[0])
    else:
        x = Average()([base_network(input_) for input_ in inputs])

    x = Flatten(name="flat")(x)

    # out_v = Dense(2, activation="softmax", name="out_v")(x)
    # out_a = Dense(2, activation="softmax", name="out_a")(x)
    out_v = Dense(1, activation="sigmoid", name="out_v")(x)

    # model = Model(inputs, [out_v, out_a])
    model = Model(inputs, out_v)

    return model


2024-06-02 16:33:37.481367: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-02 16:33:37.723877: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Utils

In [13]:
import pickle

import numpy as np

def get_sample_weights(list_of_y_trains):
    print("[get_sample_weights] list_of_y_trains: ",list_of_y_trains)
    sample_weights = np.ones(list_of_y_trains[0].shape[0])
    print("[get_sample_weights] sample_weights shape: ", sample_weights.shape)

    for y_train in list_of_y_trains:
        print("[get_sample_weights] y_train shape: ", y_train.shape)
        y_ints = y_train.argmax(1)
        print("[get_sample_weights] y_ints shape: ", y_ints.shape)

        class_weights = class_weight.compute_class_weight('balanced',
                                                          classes=np.unique(y_ints),
                                                          y=y_ints)
        print("[get_sample_weights] class_weights shape: ", class_weights.shape)
        print(class_weights)
    for i in np.unique(y_ints):
        sample_weights[y_ints == i] = \
            sample_weights[y_ints == i] * class_weights[i]
        print(sample_weights)

    return sample_weights

def print_results(scores_dict, fine_tuning):
    for scores_name, scores_path in scores_dict.items():
        with open(scores_path, "rb") as fl:
            scores = pickle.load(fl)

        accuracy_score = np.mean([score[-1] for score in scores]) * 100
        loss_score = np.mean([score[0] for score in scores]) * 100

        print(f"\n{scores_name}")
        print(f"Accuracy mean: {accuracy_score:3.4f} %")
        print(f"Loss mean: {loss_score:3.4f} %")


### Train

In [14]:
import os
import pickle
from shutil import copyfile

import keras
import numpy as np
import tensorflow as tf
from keras import backend as K
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau


def train(
    x_all_subject_train,
    x_all_subject_test,
    y_v_all_subject_train,
    y_v_all_subject_test,
    dropout_rate=0.2,
    number_of_inputs=1,
    model_dir=".",
    metrics_dir=".",
    model_name="MT_CNN",
    img_size=(8, 9, 8),
    epochs_n=200,
    seed=7,
    verbose=0,
):

    lrate = lambda model_checkpoint_path: ReduceLROnPlateau(
        best_path=model_checkpoint_path,
        monitor="val_loss",
        patience=5,
        factor=0.5,
        verbose=1,
        
    )

    es = lambda: EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=16)

    print("Save freq: ", int(y_v_all_subject_train.shape[0]/64*5))
    save_model = lambda model_checkpoint_path: ModelCheckpoint(
        model_checkpoint_path,
        monitor="loss",
        save_best_only=True,
        save_weights_only=False,
        mode="min",
        save_freq=7680,
        # period=5
    )

    scores_subject_independent_list = []

    np.random.seed(seed)
    n_splits = 5
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    for fold, (train, val) in enumerate(
        kfold.split(x_all_subject_train, y_v_all_subject_train)
    ):  
        print("Fold: ", fold)
        print("Val: ", val)
        print("Training: ", train)
        print("Train size: ", len(train))
        print(f"\n\nFold {fold + 1}/{n_splits}\n\n")
        # if fold > 1:
        #    continue
        K.clear_session()

        model_checkpoint_path_SI_unique = (
            f"{model_dir}/{model_name}-weight_AV-fold{fold + 1:02d}"
            + "-epoch{epoch:02d}-loss{loss:.2f}-V_accuracy{accuracy:.4f}.keras"
        )
        model_checkpoint_path_SI_for_load = (
            f"{model_dir}/{model_name}-weight_AV-fold{fold + 1:02d}.keras"
        )

        model = create_MT_CNN(img_size, dropout_rate, number_of_inputs)

        model.compile(
            loss=keras.losses.BinaryCrossentropy(),
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
            metrics={'out_v':'accuracy'},
        )

        # Fit the model
        x_train = x_all_subject_train[train]
        y_train_v = y_v_all_subject_train[train]


        x_val = x_all_subject_train[val]
        y_val_v = y_v_all_subject_train[val]

        x_test = x_all_subject_test
        y_test_v = y_v_all_subject_test
        
        print("Type x_train: ", type(x_train))
        print("Type y_train: ", type(y_train_v))
        print("Shape x_train: ", x_train.shape)
        print("Shape y_train: ", y_train_v.shape)

        print("Type [x_train[:, i] for i in range(x_train.shape[1])]: ", type([x_train[:, i] for i in range(x_train.shape[1])]))
        print("Type [y_train_v]: ", type([y_train_v]))
        
        hist = model.fit(
            [x_train[:, i] for i in range(x_train.shape[1])],
            y_train_v,
            epochs=epochs_n,
            batch_size=64,
            verbose=1,
            callbacks=[
                save_model(model_checkpoint_path_SI_unique),
                save_model(model_checkpoint_path_SI_for_load),
                lrate(model_checkpoint_path_SI_for_load),
                es(),
            ],
            validation_data=(
                [x_val[:, i] for i in range(x_val.shape[1])],
                y_val_v,
            ),
        )

        scores = model.evaluate(
            [x_test[:, i] for i in range(x_test.shape[1])],
            y_test_v,
            verbose=1,
        )

        scores_subject_independent_list.append(scores)

        with open(os.path.join(metrics_dir, f"{model_name}_scores_SI.pkl"), "wb") as fl:
            pickle.dump(scores_subject_independent_list, fl)


### Main

In [15]:
# specify dataset and model dirs
dataset_dir = "./preprocessed_data/3D"  # path of the folder with PSD_s and DE_s files
model_dir = "./model"  # path where the model and metrics will be stored
metrics_dir = "./metrics_bk"  # path to the folder were all metrics will be stored

img_size = img_rows, img_cols, num_chan = 8, 9, 8  # matrix shape of input data
number_of_inputs = 1  # how many frames is taken into account during one pass

features_type = "multi"  # 'PSD', 'DE' or 'multi' be carefull with num_chan
num_classes = 2  # number of classes of input data
frames_per_subject = 4800  # how many frames per one subject
seed = 7  # random seed

dropout_rate = 0.2
model_name = "MT_CNN"  # will be a filename part
epochs_n = 50  # maximum number of epochs
verbose = 0  # 0, 1 or 2

subject_n = 32


y_v_all_subject_train, y_v_all_subject_test, x_all_subject_train, x_all_subject_test, all_subject_id = load_data(
    dataset_dir,
    subject_n,
    img_size,
    number_of_inputs,
    features_type,
    num_classes,
    frames_per_subject,
    seed,
)

train(
    x_all_subject_train,
    x_all_subject_test,
    y_v_all_subject_train,
    y_v_all_subject_test,
    dropout_rate,
    number_of_inputs,
    model_dir,
    metrics_dir,
    model_name,
    img_size,
    epochs_n,
    seed,
    verbose,
)


scores_dict = {
    "results": f"{metrics_dir}/{model_name}_scores_SI.pkl"
}  # dict with path of scores file
print_results(scores_dict, fine_tuning)



processing:  01 ......
2280.0
y_v[0:10]:  [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Data shape:  (4800, 8, 8, 9)
Labels shape:  (4800,)
one_falx shape:  (4800, 1, 8, 9, 8)
one_y_v shape:  (0, 1)
one_falx shape:  (4800, 1, 8, 9, 8)
one_y_v shape:  (4800, 1)
HHHHHHHHHHHHHHHHHHHHHHH
subject_id shape:  (4800,)

processing:  02 ......
2640.0
y_v[0:10]:  [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Data shape:  (4800, 8, 8, 9)
Labels shape:  (4800,)
one_falx shape:  (4800, 1, 8, 9, 8)
one_y_v shape:  (0, 1)
one_falx shape:  (4800, 1, 8, 9, 8)
one_y_v shape:  (4800, 1)
HHHHHHHHHHHHHHHHHHHHHHH
subject_id shape:  (4800,)

processing:  03 ......
2640.0
y_v[0:10]:  [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Data shape:  (4800, 8, 8, 9)
Labels shape:  (4800,)
one_falx shape:  (4800, 1, 8, 9, 8)
one_y_v shape:  (0, 1)
one_falx shape:  (4800, 1, 8, 9, 8)
one_y_v shape:  (4800, 1)
HHHHHHHHHHHHHHHHHHHHHHH
subject_id shape:  (4800,)

processing:  04 ......
1920.0
y_v[0:10]:  [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Data shape:  (4800, 8, 8, 9)

### Evaluate Recall and Precision

In [7]:
import glob
import os

In [9]:
list_of_files = glob.glob('./model_bk/*') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
print(latest_file)

./model_bk/MT_CNN-weight_AV-fold05.keras


In [20]:
model = create_MT_CNN(img_size, dropout_rate, number_of_inputs)

model.compile(
    loss=keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics={'out_v':['accuracy', 'precision', 'recall']},
)

model.load_weights(latest_file)

scores = model.evaluate(
            [x_all_subject_test[:, i] for i in range(x_all_subject_test.shape[1])],
            y_v_all_subject_test,
            verbose=1,
        )
print(model.metrics_names)
print(scores)

  saveable.load_own_variables(weights_store.get(inner_path))


[1m960/960[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9469 - loss: 0.1425 - precision: 0.9544 - recall: 0.9499
['loss', 'compile_metrics']
[0.14670242369174957, 0.9463541507720947, 0.9542313814163208, 0.948740541934967]
