In [3]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.utils import Sequence
from sklearn.preprocessing import StandardScaler


In [4]:
directory = 'data/train/tdcsfog/'

# Initialize an empty list to store DataFrames
dfs = []

# Iterate through each file in the directory
for filename in os.listdir(directory):  
    file_path = os.path.join(directory, filename)
    # Read the CSV file into a DataFrame and append to the list
    df = pd.read_csv(file_path)
    file_id = os.path.splitext(filename)[0]
    df['Id_file'] = file_id
    columns = ['Id_file']+[col for col in df if col != 'Id_file']
    df = df[columns]
    dfs.append(df)

# Concatenate all DataFrames in the list into a single DataFrame
data = pd.concat(dfs, ignore_index=True) # The ignore_index=True argument is used to reset the index of the concatenated DataFrame so that it starts from 0 and increments linearly, regardless of the original indices of the individual DataFrames.


In [5]:
directory = 'data/train/defog'

# Initialize an empty list to store DataFrames
defog_dfs = []

# Iterate through each file in the directory
for filename in os.listdir(directory):  
    file_path = os.path.join(directory, filename)
    # Read the CSV file into a DataFrame and append to the list
    df = pd.read_csv(file_path)
    file_id = os.path.splitext(filename)[0]
    df['Id_file'] = file_id
    columns = ['Id_file']+[col for col in df if col != 'Id_file']
    df = df[columns]
    defog_dfs.append(df)

# Concatenate all DataFrames in the list into a single DataFrame
defogdata = pd.concat(defog_dfs, ignore_index=True) # The ignore_index=True argument is used to reset the index of the concatenate  

In [6]:
filtereddefog_df = defogdata[(defogdata['Valid'] == True) & (defogdata['Task'] == True)]
data = pd.concat([data,filtereddefog_df],ignore_index=True)


In [7]:
data = data.iloc[::10].reset_index(drop=True)
data

Unnamed: 0,Id_file,Time,AccV,AccML,AccAP,StartHesitation,Turn,Walking,Valid,Task
0,003f117e14,0,-9.533939,0.566322,-1.413525,0,0,0,,
1,003f117e14,10,-9.526468,0.548411,-1.422251,0,0,0,,
2,003f117e14,20,-9.533576,0.557438,-1.433694,0,0,0,,
3,003f117e14,30,-9.536079,0.559618,-1.422550,0,0,0,,
4,003f117e14,40,-9.538613,0.550434,-1.393474,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...
1115316,f9fc61ce85,118990,-0.965025,0.130372,-0.286653,0,0,0,True,True
1115317,f9fc61ce85,119000,-0.990705,0.157651,-0.276927,0,0,0,True,True
1115318,f9fc61ce85,119010,-0.959941,0.138322,-0.288797,0,0,0,True,True
1115319,f9fc61ce85,119020,-0.963687,0.131883,-0.292917,0,0,0,True,True


In [8]:
data['Time'] = pd.to_datetime(data['Time'])  # Convert to datetime format
data['Elapsed_Time'] = data.groupby('Id_file')['Time'].transform(lambda x: (x - x.min()).dt.total_seconds())
data

Unnamed: 0,Id_file,Time,AccV,AccML,AccAP,StartHesitation,Turn,Walking,Valid,Task,Elapsed_Time
0,003f117e14,1970-01-01 00:00:00.000000000,-9.533939,0.566322,-1.413525,0,0,0,,,0.000000e+00
1,003f117e14,1970-01-01 00:00:00.000000010,-9.526468,0.548411,-1.422251,0,0,0,,,1.000000e-08
2,003f117e14,1970-01-01 00:00:00.000000020,-9.533576,0.557438,-1.433694,0,0,0,,,2.000000e-08
3,003f117e14,1970-01-01 00:00:00.000000030,-9.536079,0.559618,-1.422550,0,0,0,,,3.000000e-08
4,003f117e14,1970-01-01 00:00:00.000000040,-9.538613,0.550434,-1.393474,0,0,0,,,4.000000e-08
...,...,...,...,...,...,...,...,...,...,...,...
1115316,f9fc61ce85,1970-01-01 00:00:00.000118990,-0.965025,0.130372,-0.286653,0,0,0,True,True,1.179820e-04
1115317,f9fc61ce85,1970-01-01 00:00:00.000119000,-0.990705,0.157651,-0.276927,0,0,0,True,True,1.179920e-04
1115318,f9fc61ce85,1970-01-01 00:00:00.000119010,-0.959941,0.138322,-0.288797,0,0,0,True,True,1.180020e-04
1115319,f9fc61ce85,1970-01-01 00:00:00.000119020,-0.963687,0.131883,-0.292917,0,0,0,True,True,1.180120e-04


In [9]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
data['Elapsed_Time'] = scaler.fit_transform(data[['Elapsed_Time']])

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
data[['AccAP','AccML','AccAP']] = sc.fit_transform(data[['AccAP','AccML','AccAP']])
data

Unnamed: 0,Id_file,Time,AccV,AccML,AccAP,StartHesitation,Turn,Walking,Valid,Task,Elapsed_Time
0,003f117e14,1970-01-01 00:00:00.000000000,-9.533939,0.682039,-1.195466,0,0,0,,,0.000000
1,003f117e14,1970-01-01 00:00:00.000000010,-9.526468,0.664439,-1.199679,0,0,0,,,0.000024
2,003f117e14,1970-01-01 00:00:00.000000020,-9.533576,0.673310,-1.205205,0,0,0,,,0.000048
3,003f117e14,1970-01-01 00:00:00.000000030,-9.536079,0.675452,-1.199824,0,0,0,,,0.000073
4,003f117e14,1970-01-01 00:00:00.000000040,-9.538613,0.666427,-1.185783,0,0,0,,,0.000097
...,...,...,...,...,...,...,...,...,...,...,...
1115316,f9fc61ce85,1970-01-01 00:00:00.000118990,-0.965025,0.253654,-0.651316,0,0,0,True,True,0.285409
1115317,f9fc61ce85,1970-01-01 00:00:00.000119000,-0.990705,0.280460,-0.646620,0,0,0,True,True,0.285434
1115318,f9fc61ce85,1970-01-01 00:00:00.000119010,-0.959941,0.261466,-0.652351,0,0,0,True,True,0.285458
1115319,f9fc61ce85,1970-01-01 00:00:00.000119020,-0.963687,0.255139,-0.654341,0,0,0,True,True,0.285482


In [10]:
x_train = data[['Elapsed_Time', 'AccV', 'AccML', 'AccAP']].values
y_walking_train = data.loc[data.index[:len(x_train)], 'Walking'].values
y_turn_train = data.loc[data.index[:len(x_train)], 'Turn'].values
y_SH_train = data.loc[data.index[:len(x_train)], 'StartHesitation'].values
data = data.dropna().reset_index(drop=True)
print(x_train.shape, y_walking_train.shape, y_turn_train.shape, y_SH_train.shape)


(1115321, 4) (1115321,) (1115321,) (1115321,)


In [11]:
import numpy as np

def create_lstm_sequences(x, y, seq_length):
    x_seq, y_seq = [], []
    for i in range(len(x) - seq_length):  # This ensures reduction in size
        x_seq.append(x[i:i+seq_length])  
        y_seq.append(y[i + seq_length])  

    print(f"After sequence creation: X shape {len(x_seq)}, Y shape {len(y_seq)}")  
    return np.array(x_seq), np.array(y_seq)

SEQ_LENGTH = 30  

x, y_walking = create_lstm_sequences(x_train, y_walking_train, SEQ_LENGTH)
_, y_turn = create_lstm_sequences(x_train, y_turn_train, SEQ_LENGTH)
_, y_SH = create_lstm_sequences(x_train, y_SH_train, SEQ_LENGTH)

# Check final shapes
print("Final X_train LSTM shape:", x.shape)  # Expected: (1115291, 30, 4)
print("Final Y_walking LSTM shape:", y_walking.shape)  # Expected: (1115291,)


After sequence creation: X shape 1115291, Y shape 1115291
After sequence creation: X shape 1115291, Y shape 1115291
After sequence creation: X shape 1115291, Y shape 1115291
Final X_train LSTM shape: (1115291, 30, 4)
Final Y_walking LSTM shape: (1115291,)


In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_walking_train, y_walking_test = train_test_split(x, y_walking, test_size=0.2, random_state=42)
_, _, y_turn_train, y_turn_test = train_test_split(x, y_turn, test_size=0.2, random_state=42)
_, _, y_SH_train, y_SH_test = train_test_split(x, y_SH, test_size=0.2, random_state=42)

In [5]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, LSTM, Dense, Dropout, BatchNormalization

time_steps = 30
num_features = 4
# Input
input_layer = Input(shape=(time_steps, num_features))

# CNN Layer (Feature Extraction)
cnn = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(input_layer)
cnn = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(cnn)
cnn = BatchNormalization()(cnn)
cnn = Dropout(0.3)(cnn)

# LSTM Layer (Sequence Learning)
lstm = LSTM(64, return_sequences=True)(cnn)
lstm = LSTM(32, return_sequences=False)(lstm)

# Shared Representation
shared = Dense(64, activation='relu')(lstm)
shared = Dropout(0.3)(shared)

# Multi-task Outputs
start_hesitation = Dense(32, activation='relu')(shared)
start_hesitation = Dense(1, activation='sigmoid', name="StartHesitation")(start_hesitation)

turn = Dense(32, activation='relu')(shared)
turn = Dense(1, activation='sigmoid', name="Turn")(turn)

walking = Dense(32, activation='relu')(shared)
walking = Dense(1, activation='sigmoid', name="Walking")(walking)

# Model Definition
model = Model(inputs=input_layer, outputs=[start_hesitation, turn, walking])
model.compile(optimizer="adam", loss="binary_crossentropy")




In [13]:
from tensorflow.keras.models import load_model

model = load_model("lstm_model.h5")  # Try this first!




In [14]:
results = model.evaluate(
    X_test, 
    {"StartHesitation": y_SH_test, "Turn": y_turn_test, "Walking": y_turn_test}
)


[1m6971/6971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 10ms/step - StartHesitation_accuracy: 0.9937 - StartHesitation_loss: 0.0203 - Turn_accuracy: 0.9388 - Turn_loss: 0.1545 - Walking_accuracy: 0.7822 - Walking_loss: 2.2222 - loss: 2.3970


In [1]:
import tensorflow as tf
print(tf.__version__)


2.17.0
