### <font color='orange'>0. Imports</font>

In [1]:
# For numerical manipulation
import numpy as np

# For dataframes utilization
import pandas as pd

# File operations
import os


# Tensorflow for CNN model
import tensorflow as tf
from tensorflow import keras
from keras import layers


from sklearn.metrics import average_precision_score
from keras.models import load_model

### <font color='orange'>2. Creating the TDCSFOG Dataset</font>

### <font color='purple'>2.1. Train</font>

In [2]:
directory = 'data/train/tdcsfog/'

# Initialize an empty list to store DataFrames
dfs = []

# Iterate through each file in the directory
for filename in os.listdir(directory):  
    file_path = os.path.join(directory, filename)
    # Read the CSV file into a DataFrame and append to the list
    df = pd.read_csv(file_path)
    file_id = os.path.splitext(filename)[0]
    df['Id_file'] = file_id
    columns = ['Id_file']+[col for col in df if col != 'Id_file']
    df = df[columns]
    dfs.append(df)

# Concatenate all DataFrames in the list into a single DataFrame
data = pd.concat(dfs, ignore_index=True) # The ignore_index=True argument is used to reset the index of the concatenated DataFrame so that it starts from 0 and increments linearly, regardless of the original indices of the individual DataFrames.

In [3]:
directory = 'data/train/defog'

# Initialize an empty list to store DataFrames
defog_dfs = []

# Iterate through each file in the directory
for filename in os.listdir(directory):  
    file_path = os.path.join(directory, filename)
    # Read the CSV file into a DataFrame and append to the list
    df = pd.read_csv(file_path)
    file_id = os.path.splitext(filename)[0]
    df['Id_file'] = file_id
    columns = ['Id_file']+[col for col in df if col != 'Id_file']
    df = df[columns]
    defog_dfs.append(df)

# Concatenate all DataFrames in the list into a single DataFrame
defogdata = pd.concat(defog_dfs, ignore_index=True) # The ignore_index=True argument is used to reset the index of the concatenate

In [4]:
filtereddefog_df = defogdata[(defogdata['Valid'] == True) & (defogdata['Task'] == True)]

In [5]:
data = pd.concat([data,filtereddefog_df],ignore_index=True)

In [6]:
data

Unnamed: 0,Id_file,Time,AccV,AccML,AccAP,StartHesitation,Turn,Walking,Valid,Task
0,003f117e14,0,-9.533939,0.566322,-1.413525,0,0,0,,
1,003f117e14,1,-9.536140,0.564137,-1.440621,0,0,0,,
2,003f117e14,2,-9.529345,0.561765,-1.429332,0,0,0,,
3,003f117e14,3,-9.531239,0.564227,-1.415490,0,0,0,,
4,003f117e14,4,-9.540825,0.561854,-1.429471,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...
11153197,f9fc61ce85,119027,-0.961216,0.142428,-0.289655,0,0,0,True,True
11153198,f9fc61ce85,119028,-0.960343,0.142836,-0.290506,0,0,0,True,True
11153199,f9fc61ce85,119029,-0.957958,0.145494,-0.290007,0,0,0,True,True
11153200,f9fc61ce85,119030,-0.960616,0.145839,-0.291527,0,0,0,True,True


In [7]:
x_train = data[['AccV','AccML','AccAP']].copy().values
y_walking_train = data['Walking']
y_turn_train = data['Turn']
y_SH_train = data['StartHesitation']

In [8]:
len(x_train)

11153202

In [9]:
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))

num_classes = 2

# idx = np.random.permutation(len(x_train))
# x_train = x_train[idx]
# y_walking_train = y_walking_train[idx]
# y_turn_train = y_turn_train[idx]
# y_SH_train = y_SH_train[idx]

### <font color='purple'>2.2. Test</font>

In [10]:
test_directory = 'data/test/tdcsfog'

# Initialize an empty list to store DataFrames
test_dfs = []


# Iterate through each file in the directory
for filename in os.listdir(test_directory):  
    file_path = os.path.join(test_directory, filename)
    # Read the CSV file into a DataFrame and append to the list
    test_df = pd.read_csv(file_path)
    file_id = os.path.splitext(filename)[0]
    df['Id_file'] = file_id
    columns = ['Id_file']+[col for col in df if col != 'Id_file']
    df = df[columns]
    test_dfs.append(df)
# Concatenate all DataFrames in the list into a single DataFrame
test_data = pd.concat(test_dfs, ignore_index=True)

In [11]:
test_directory = 'data/test/defog'

# Initialize an empty list to store DataFrames
test_dfs = []

# Iterate through each file in the directory
for filename in os.listdir(test_directory):  
    file_path = os.path.join(test_directory, filename)
    # Read the CSV file into a DataFrame and append to the list
    test_df = pd.read_csv(file_path)
    file_id = os.path.splitext(filename)[0]
    df['Id_file'] = file_id
    columns = ['Id_file']+[col for col in df if col != 'Id_file']
    df = df[columns]
    test_dfs.append(df)
# Concatenate all DataFrames in the list into a single DataFrame
test_defogdata = pd.concat(test_dfs, ignore_index=True)

In [12]:
test_data = pd.concat([test_data,test_defogdata],ignore_index=True)

In [13]:
x_test = test_data[['AccV','AccML','AccAP']].copy().values
y_walking_test = test_data['Walking']
y_turn_test = test_data['Turn']
y_SH_test = test_data['StartHesitation']


In [14]:
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))
idx = np.random.permutation(len(x_test))
x_test = x_test[idx]
y_walking_test = y_walking_test[idx]
y_turn_test = y_turn_test[idx]
y_SH_test = y_SH_test[idx]

### <font color='orange'>3. CNN model</font>

### <font color='purple'>3.1. Defining model</font>

In [16]:

def make_multitask_model(input_shape):
    input_layer = keras.layers.Input(shape=input_shape)

    # Define convolutional block with BatchNormalization and ReLU activation
    def conv_block(x, filters, kernel_size):
        x = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding="same")(x)
        x = keras.layers.BatchNormalization()(x)
        x = keras.layers.ReLU()(x)
        return x

    # Shared CNN feature extraction
    conv1 = conv_block(input_layer, filters=64, kernel_size=3)
    conv2 = conv_block(conv1, filters=64, kernel_size=3)
    conv3 = conv_block(conv2, filters=64, kernel_size=3)

    gap = keras.layers.GlobalAveragePooling1D()(conv3)

    # Multi-task output layers
    walking_output = keras.layers.Dense(1, activation="sigmoid", name="walking")(gap)
    turning_output = keras.layers.Dense(1, activation="sigmoid", name="turning")(gap)
    sh_output = keras.layers.Dense(1, activation="sigmoid", name="start_hesitation")(gap)

    return keras.models.Model(inputs=input_layer, outputs=[walking_output, turning_output, sh_output])

# Create model
model = make_multitask_model(input_shape=x_train.shape[1:])




In [21]:
from tensorflow.keras.optimizers import Adam

optimizer = Adam(learning_rate=0.0005)
model.compile(optimizer, 
              loss={'walking': 'binary_crossentropy', 
                    'turning': 'binary_crossentropy', 
                    'start_hesitation': 'binary_crossentropy'},
              metrics=['accuracy'])

epochs = 2
batch_size = 96


In [25]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

lr_scheduler = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=3, min_lr=1e-5, verbose=1)


history = model.fit(x_train, 
                    {'walking': y_walking_train, 
                     'turning': y_turn_train, 
                     'start_hesitation': y_SH_train}, 
                    batch_size=batch_size, epochs=50,callbacks=[lr_scheduler])



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 5: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 16: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 19: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 24: ReduceLROnPlateau reducing learning rate to 1e-05.
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [36]:
print(f"Final Training Loss: {history.history['loss'][-1]}")
print(f"Final Walking Accuracy: {history.history['walking_accuracy'][-1]}")
print(f"Final Turning Accuracy: {history.history['turning_accuracy'][-1]}")
print(f"Final SH Accuracy: {history.history['start_hesitation_accuracy'][-1]}")


Final Training Loss: 0.634383499622345
Final Walking Accuracy: 0.9716699719429016
Final Turning Accuracy: 0.8076900243759155
Final SH Accuracy: 0.9722499847412109


In [26]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 3, 1)]       0           []                               
                                                                                                  
 conv1d (Conv1D)                (None, 3, 64)        256         ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 3, 64)       256         ['conv1d[0][0]']                 
 alization)                                                                                       
                                                                                                  
 re_lu (ReLU)                   (None, 3, 64)        0           ['batch_normalization[0][0]']

In [41]:


# Load Test Data
test_data = pd.read_csv('data/test/tdcsfog/003f117e14.csv')  # Update with correct path

# Extract Features (Update if using more features)
x_test = test_data[['AccV', 'AccML', 'AccAP']].values  

threshold = 0.3  # Lower threshold to check if model is biased

y_pred = model.predict(x_test)
print(y_pred[:10])  # Check values before applying the threshold

predictions_walking = (predictions[0] > threshold).astype(int)
predictions_turn = (predictions[1] > threshold).astype(int)
predictions_start_hesitation = (predictions[2] > threshold).astype(int)


# Create the final submission DataFrame
submission_df = pd.DataFrame({
    "Id": test_data.index,  # Ensure ID matches test data index
    "StartHesitation": predictions_start_hesitation.flatten(),
    "Turn": predictions_turn.flatten(),
    "Walking": predictions_walking.flatten()
})

print(submission_df.head())  # Verify output

[array([[0.0004974 ],
       [0.00048312],
       [0.00048508],
       ...,
       [0.00020125],
       [0.00019972],
       [0.00019969]], dtype=float32), array([[0.00255989],
       [0.00249217],
       [0.00250882],
       ...,
       [0.00125433],
       [0.00124814],
       [0.00125834]], dtype=float32), array([[0.00058125],
       [0.00056117],
       [0.00056815],
       ...,
       [0.00028696],
       [0.00028493],
       [0.00027914]], dtype=float32)]


ValueError: All arrays must be of the same length

In [32]:
test_data.head()

Unnamed: 0,Time,AccV,AccML,AccAP
0,0,-0.914652,-0.300851,0.298156
1,1,-0.914367,-0.301572,0.298325
2,2,-0.77954,-0.343197,0.27569
3,3,-0.993162,-0.270281,0.315775
4,4,-0.948767,-0.307849,0.297191


In [31]:
Id_seriestimepoints

Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       ...
       '281678', '281679', '281680', '281681', '281682', '281683', '281684',
       '281685', '281686', '281687'],
      dtype='object', length=281688)