In [1]:
import tensorflow as tf
from tensorflow import argmax
from tensorflow.data import AUTOTUNE, Dataset
from tensorflow.keras.models import Model 
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy, BinaryCrossentropy
from tensorflow.keras.layers import (
    Dense, 
    Input, 
    GlobalAveragePooling1D,
    Bidirectional,
    LSTM,
    Dropout,
    Lambda
    )

from src.nn_blocks import unet_se_cnn, features_processing, GatedMixupGenerator, tof_block
from src.functions import (
    train_model, 
    perform_target_encoding, 
    build_dataset,
    create_sequence_dataset,
    pl_standard_scaling,
    perform_padding,
    generate_gate_targets
)

from src.constants import DATA_PATH
from tensorflow import argmax
from sklearn.metrics import classification_report, accuracy_score

import polars as pl
import numpy as np

2025-08-09 18:45:44.951170: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754761544.970592   27537 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754761544.976422   27537 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754761545.006344   27537 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1754761545.006389   27537 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1754761545.006392   27537 computation_placer.cc:177] computation placer alr

In [2]:
# for e in train_dataset:
#     x=e[0]
#     y=e[1]
#     break

# original = x    
# inputs = x

In [3]:
def create_model_definition(dataset, imu_dim, wd=1e-4):
    sample_batch = next(iter(dataset))
    input_shape = sample_batch[0].shape[1:]
    # inp = Input(shape=(128, imu_dim+tof_dim))
    inp = Input(shape=input_shape)
    imu = Lambda(lambda t: t[:, :, :imu_dim])(inp)
    tof = Lambda(lambda t: t[:, :, imu_dim:])(inp)

    x1 = unet_se_cnn(imu, 3, base_filters=64, kernel_size=3)
    x2 = tof_block(tof, wd)

    x = features_processing(x1, x2)
    x = Dropout(0.3)(x) 
    main_out = Dense(18, activation="softmax", name="main_output")(x)
    gate_out = Dense(1, activation="sigmoid", name="tof_gate")(x)
    
    return Model(inputs=inp, outputs={"main_output": main_out, "tof_gate": gate_out})

In [4]:
# import tensorflow as tf
# from tensorflow.keras.layers import Lambda

# def shape_debug(x, msg=""):
#     print(f"{msg} -> {x.shape}")
#     return x

# def shape_debug_layer(msg=""):
#     return Lambda(lambda t: tf.print(f"{msg} ->", tf.shape(t)) or t,
#                   name=f"debug_{msg.replace(' ', '_')}")


# def unet_se_cnn_debug(x, unet_depth=3, base_filters=64, kernel_size=3, drop=0.3):
#     filters = base_filters
#     skips = []

#     for d in range(unet_depth):
#         x = shape_debug(x, f"IMU encoder start depth {d}")
#         x = residual_se_cnn_block(x, filters, kernel_size, drop=drop)
#         x = shape_debug(x, f"IMU after residual+pool depth {d}")
#         skips.append(x)
#         filters *= 2

#     c_shape = x.shape[-1]
#     x = Dense(128)(x)
#     x = Dense(c_shape)(x)
#     x = shape_debug(x, "IMU bottleneck")

#     for d, skip in enumerate(reversed(skips)):
#         filters //= 2
#         x = res_se_cnn_decoder_block(x, filters, kernel_size, drop=drop, skip_connection=skip)
#         x = shape_debug(x, f"IMU decoder after upsampling depth {d}")

#     return x

# def tof_block_debug(tof, wd=1e-4):
#     x = shape_debug(tof, "ToF input")
#     x = Conv1D(64, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(x)
#     x = BatchNormalization()(x)
#     x = Activation('relu')(x)
#     x = shape_debug(x, "ToF after Conv1D-1")
#     x = MaxPooling1D(2)(x)
#     x = shape_debug(x, "ToF after pool-1")
#     x = Dropout(0.2)(x)

#     x = Conv1D(128, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(x)
#     x = BatchNormalization()(x)
#     x = Activation('relu')(x)
#     x = shape_debug(x, "ToF after Conv1D-2")
#     x = MaxPooling1D(2)(x)
#     x = shape_debug(x, "ToF after pool-2")
#     x = Dropout(0.2)(x)
#     return x


In [5]:
# model = create_model_definition(x)
# for layer in model.layers:
#     print(layer.name, layer.output_shape)

In [None]:
import polars as pl
from sklearn.model_selection import KFold
from tensorflow import argmax
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
import numpy as np
import gc
from sklearn.preprocessing import LabelEncoder


# PARQUET_FILE = f'{DATA_PATH}/extended_features_df.parquet'
# PARQUET_FILE = 'output/full_features_df.parquet'
PARQUET_FILE = 'output/final_processed_train_data.parquet'
LR_INIT = 5e-4
WD = 3e-3
NUM_CLASSES = 18
BATCH_SIZE = 64

schema_df = pl.read_parquet(PARQUET_FILE, n_rows=0)
all_columns = schema_df.columns
meta_cols = {'gesture', 'gesture_int', 'sequence_type', 'behavior', 'orientation',
                'row_id', 'subject', 'phase', 'sequence_id', 'sequence_counter'}
feature_cols = [c for c in all_columns if c not in meta_cols]
imu_cols  = [c for c in feature_cols if not (c.startswith('thm_') or c.startswith('tof_'))]
tof_cols  = [c for c in feature_cols if c.startswith('thm_') or c.startswith('tof_')]

print("Scanning Parquet file for sequence IDs...")
all_sequence_ids = (
    pl.scan_parquet(PARQUET_FILE)
    .select('sequence_id')
    .unique()
    .collect()
    .to_numpy()
    .ravel()
)
print(f"Found {len(all_sequence_ids)} unique sequences.")

kf = KFold(n_splits=4, shuffle=True, random_state=42)
fold_accuracies = []
all_preds = []
all_labels = []

imu_dim = len(imu_cols)

for fold_idx, (train_indices, val_indices) in enumerate(kf.split(all_sequence_ids)):
    print(f"\n=== Fold {fold_idx + 1} ===")
    train_ids = all_sequence_ids[train_indices]
    val_ids = all_sequence_ids[val_indices]

    print(f"Loading data for fold {fold_idx + 1}...")
    train_df = pl.read_parquet(PARQUET_FILE).filter(pl.col('sequence_id').is_in(train_ids))
    val_df = pl.read_parquet(PARQUET_FILE).filter(pl.col('sequence_id').is_in(val_ids))
    print("Fold data loaded.")

    train_gate_df = generate_gate_targets(train_df, tof_cols)
    val_gate_df = generate_gate_targets(val_df, tof_cols)

    le = LabelEncoder()
    le.fit(train_df['gesture']) 
    train_df = train_df.with_columns(pl.Series("gesture_int", le.transform(train_df['gesture'])))
    val_df = val_df.with_columns(pl.Series("gesture_int", le.transform(val_df['gesture'])))

    X_train_scaled_features, X_val_scaled_features = pl_standard_scaling(train_df, val_df, imu_cols + tof_cols)

    meta_cols_to_keep = ['sequence_id', 'gesture_int']
    train_df_final = train_df.select(meta_cols_to_keep).with_columns(X_train_scaled_features)
    val_df_final = val_df.select(meta_cols_to_keep).with_columns(X_val_scaled_features)

    del train_df, val_df, X_train_scaled_features, X_val_scaled_features
    gc.collect()

    X_train, y_train, train_gate_target = create_sequence_dataset(train_df_final, imu_cols + tof_cols, train_gate_df)
    X_val, y_val, val_gate_target = create_sequence_dataset(val_df_final, imu_cols + tof_cols, val_gate_df)

    del train_df_final, val_df_final
    gc.collect()

    max_pad_len=128
    X_train_padded = perform_padding(X_train, max_pad_len)
    X_val_padded = perform_padding(X_val, max_pad_len)
    print(f'Fully padded dataset shape: {X_train_padded.shape}')

    y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes=NUM_CLASSES)
    y_val_cat = tf.keras.utils.to_categorical(y_val, num_classes=NUM_CLASSES)

    train_dataset = GatedMixupGenerator(
        X=X_train_padded,
        y=y_train_cat,
        gate_targets=train_gate_target,
        batch_size=BATCH_SIZE,
        imu_dim=imu_dim,
        class_weight=None,
        alpha=0.2,
        masking_prob=0.25
    )

    val_dataset = tf.data.Dataset.from_tensor_slices((
        X_val_padded,
        {
            'main_output': y_val_cat,
            'tof_gate': val_gate_target[:, np.newaxis]
        }
    )).batch(BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)

    del X_val, y_val, X_train, y_train, X_train_padded, X_val_padded
    gc.collect()
    
    model = create_model_definition(train_dataset, len(imu_cols))
    train_model(model, train_dataset, val_dataset, 150, LR_INIT, WD)

    print(f"--- Evaluating Fold {fold_idx + 1} ---")
    val_preds = model.predict(val_dataset)
    main_output_preds = val_preds['main_output']

    y_pred_fold = np.argmax(main_output_preds, axis=1)
    y_true_fold = np.argmax(y_val_cat, axis=1)

    fold_acc = accuracy_score(y_true_fold, y_pred_fold)
    fold_accuracies.append(fold_acc)
    print(f"Fold {fold_idx + 1} Accuracy: {fold_acc:.4f}")

    all_preds.append(y_pred_fold)
    all_labels.append(y_true_fold)

    del train_dataset, model, val_dataset
    gc.collect()

Scanning Parquet file for sequence IDs...
Found 8151 unique sequences.

=== Fold 1 ===
Loading data for fold 1...
Fold data loaded.
Fully padded dataset shape: (6113, 128, 38)


I0000 00:00:1754761587.118613   27537 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4714 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1060, pci bus id: 0000:01:00.0, compute capability: 6.1


LR Scheduler: 96 steps per epoch, 14400 total decay steps.
Epoch 1/150


  self._warn_if_super_not_called()
I0000 00:00:1754761615.771390   27919 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 226ms/step - loss: 3.7619 - main_output_accuracy: 0.1005 - main_output_loss: 3.2613 - tof_gate_loss: 0.6081 - val_loss: 3.0722 - val_main_output_accuracy: 0.2439 - val_main_output_loss: 2.6052 - val_tof_gate_loss: 0.4871
Epoch 2/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 193ms/step - loss: 3.1301 - main_output_accuracy: 0.2083 - main_output_loss: 2.6977 - tof_gate_loss: 0.3327 - val_loss: 2.8514 - val_main_output_accuracy: 0.2463 - val_main_output_loss: 2.4219 - val_tof_gate_loss: 0.3897
Epoch 3/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 194ms/step - loss: 2.9226 - main_output_accuracy: 0.2453 - main_output_loss: 2.5223 - tof_gate_loss: 0.2956 - val_loss: 2.3786 - val_main_output_accuracy: 0.4239 - val_main_output_loss: 2.0031 - val_tof_gate_loss: 0.2159
Epoch 4/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 195ms/step - loss: 2.7170 - main_output_a

  self._warn_if_super_not_called()


[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 211ms/step - loss: 3.7743 - main_output_accuracy: 0.0962 - main_output_loss: 3.2834 - tof_gate_loss: 0.5827 - val_loss: 3.1264 - val_main_output_accuracy: 0.1546 - val_main_output_loss: 2.6848 - val_tof_gate_loss: 0.3575
Epoch 2/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 204ms/step - loss: 3.0679 - main_output_accuracy: 0.2223 - main_output_loss: 2.6446 - tof_gate_loss: 0.2925 - val_loss: 2.6363 - val_main_output_accuracy: 0.3150 - val_main_output_loss: 2.2428 - val_tof_gate_loss: 0.2151
Epoch 3/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 190ms/step - loss: 2.8878 - main_output_accuracy: 0.2615 - main_output_loss: 2.4882 - tof_gate_loss: 0.2757 - val_loss: 2.3070 - val_main_output_accuracy: 0.4284 - val_main_output_loss: 1.9483 - val_tof_gate_loss: 0.1481
Epoch 4/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 178ms/step - loss: 2.7394 - main_output_a

  self._warn_if_super_not_called()


[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 208ms/step - loss: 3.7130 - main_output_accuracy: 0.1036 - main_output_loss: 3.2299 - tof_gate_loss: 0.5138 - val_loss: 3.0919 - val_main_output_accuracy: 0.2635 - val_main_output_loss: 2.6377 - val_tof_gate_loss: 0.4152
Epoch 2/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 204ms/step - loss: 3.1098 - main_output_accuracy: 0.2115 - main_output_loss: 2.6819 - tof_gate_loss: 0.3024 - val_loss: 2.6079 - val_main_output_accuracy: 0.3562 - val_main_output_loss: 2.2030 - val_tof_gate_loss: 0.2540
Epoch 3/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 187ms/step - loss: 2.8945 - main_output_accuracy: 0.2487 - main_output_loss: 2.4975 - tof_gate_loss: 0.2395 - val_loss: 2.3055 - val_main_output_accuracy: 0.4323 - val_main_output_loss: 1.9464 - val_tof_gate_loss: 0.1153
Epoch 4/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 204ms/step - loss: 2.7843 - main_output_a

  self._warn_if_super_not_called()


[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 244ms/step - loss: 3.7919 - main_output_accuracy: 0.1007 - main_output_loss: 3.2945 - tof_gate_loss: 0.5848 - val_loss: 3.1239 - val_main_output_accuracy: 0.2067 - val_main_output_loss: 2.6571 - val_tof_gate_loss: 0.4886
Epoch 2/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 186ms/step - loss: 3.1846 - main_output_accuracy: 0.2050 - main_output_loss: 2.7599 - tof_gate_loss: 0.3003 - val_loss: 2.6684 - val_main_output_accuracy: 0.3294 - val_main_output_loss: 2.2759 - val_tof_gate_loss: 0.2137
Epoch 3/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 194ms/step - loss: 2.8814 - main_output_accuracy: 0.2721 - main_output_loss: 2.4871 - tof_gate_loss: 0.2531 - val_loss: 2.2877 - val_main_output_accuracy: 0.4546 - val_main_output_loss: 1.9272 - val_tof_gate_loss: 0.1528
Epoch 4/150
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 177ms/step - loss: 2.7392 - main_output_a

In [7]:
# Cross-validation Summary
print("\n=== Cross-validation Summary ===")
print(f"Per-fold Accuracies: {fold_accuracies}")
print(f"Mean Accuracy: {np.mean(fold_accuracies):.4f} ± {np.std(fold_accuracies):.4f}")

# Global classification report
y_all_pred = np.concatenate(all_preds)
y_all_true = np.concatenate(all_labels)

print("\n=== Overall Classification Report ===")
print(classification_report(y_all_true, y_all_pred, digits=4))


=== Cross-validation Summary ===
Per-fold Accuracies: [0.7546614327772326, 0.7507360157016683, 0.767909715407262, 0.7741777123220422]
Mean Accuracy: 0.7619 ± 0.0095

=== Overall Classification Report ===
              precision    recall  f1-score   support

           0     0.8195    0.8041    0.8117       638
           1     0.6683    0.6264    0.6467       637
           2     0.9808    0.9503    0.9653       161
           3     0.5449    0.5611    0.5529       638
           4     0.6404    0.5703    0.6033       640
           5     0.8735    0.9006    0.8869       161
           6     0.8163    0.7844    0.8000       640
           7     0.8759    0.9156    0.8953       640
           8     0.9808    0.9503    0.9653       161
           9     0.5899    0.6922    0.6370       640
          10     0.6574    0.6297    0.6433       640
          11     0.6854    0.7578    0.7198       161
          12     0.9462    0.9581    0.9521       477
          13     0.5155    0.3106    0

In [8]:
for e in train_dataset:
    x=e[0]
    y=e[1]
    break

original = x    
inputs = x

NameError: name 'train_dataset' is not defined

In [None]:
x.shape

(64, 127, 27)

In [None]:
from tensorflow.keras.utils import plot_model
import numpy as np

# from src.constants import return_data_path
# DATA_PATH = return_data_path()
# FILE_PATH = f'{DATA_PATH}train.csv'
# schema_df = pl.read_csv(FILE_PATH, n_rows=0)
# all_columns = schema_df.columns
# meta_cols = {'gesture', 'gesture_int', 'sequence_type', 'behavior', 'orientation',
#                 'row_id', 'subject', 'phase', 'sequence_id', 'sequence_counter'}
# feature_cols = [c for c in all_columns if c not in meta_cols]
# imu_cols  = [c for c in feature_cols if not (c.startswith('thm_') or c.startswith('tof_'))]
# tof_cols  = [c for c in feature_cols if c.startswith('thm_') or c.startswith('tof_')]
# imu_cols  = [c for c in feature_cols if not (c.startswith('thm_') or c.startswith('tof_'))]

# model = build_gated_two_branch_model(125, 13, 25, 18)
trainable_count = np.sum([np.prod(v.shape) for v in model.trainable_weights])
non_trainable_count = np.sum([np.prod(v.shape) for v in model.non_trainable_weights])
total_params = trainable_count + non_trainable_count

print(f"Trainable params: {trainable_count:,}")
print(f"Non-trainable params: {non_trainable_count:,}")
print(f"Total params: {total_params:,}")


bytes_per_param = 4  # for float32
model_size_bytes = total_params * bytes_per_param
model_size_mb = model_size_bytes / (1024**2)

print(f"Estimated model size: {model_size_mb:.2f} MB")

Trainable params: 486,492
Non-trainable params: 960
Total params: 487,452
Estimated model size: 1.86 MB


In [None]:
train_dataset.

<bound method GatedMixupGenerator.__getitem__ of <src.nn_blocks.GatedMixupGenerator object at 0x7f48cd2829f0>>

In [None]:
x = original
x = unet_se_cnn(x, base_filters=32, kernel_size=3)
x.shape

TensorShape([24, 348, 32])

In [None]:
from tensorflow.keras.layers import GRU, GaussianNoise, Concatenate, Lambda, Activation, Multiply

wd=1e-4
xa = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
xb = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
xc = GaussianNoise(0.09)(x)
xc = Dense(16, activation='elu')(xc)
x = Concatenate()([xa, xb, xc])
# xa.shape, xb.shape, xc.shape
x.shape

TensorShape([24, 348, 528])

In [None]:
import tensorflow as tf
from tensorflow.keras import backend as k

def time_sum(x):
    return k.sum(x, axis=1)

def squeeze_last_axis(x):
    return tf.squeeze(x, axis=-1)

def expand_last_axis(x):
    return tf.expand_dims(x, axis=-1)

score = Dense(1, activation='tanh')(inputs)
score = Lambda(squeeze_last_axis)(score)
score.shape

TensorShape([24, 700])

In [None]:
weights = Activation('softmax')(score)
weights = Lambda(expand_last_axis)(weights)
weights.shape

TensorShape([24, 700, 1])

In [None]:
context = Multiply()([inputs, weights])
context = Lambda(time_sum)(context)
context.shape

TensorShape([24, 15])

In [None]:
for units, drop in [(256, 0.5), (128, 0.3)]:
    x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
    x = Activation('relu')(x)
    x = Dropout(drop)(x)

x.shape    

TensorShape([24, 348, 128])

In [None]:

x = features_processing(x)
x = Dropout(0.3)(x) 
print(x.shape)
x = Dense(x.shape[-1], activation="relu")(x)
x = GlobalAveragePooling1D()(x)
outputs = Dense(18, activation="softmax")(x)

In [None]:
# from src.nn_blocks import residual_se_cnn_block,  res_se_cnn_decoder_block, unet_se_cnn_bilstm, unet_se_cnn
# x = original
# b = residual_se_cnn_block(x, 64, 3, 2)
# b.shape

TensorShape([24, 350, 64])

In [None]:
# from tensorflow.keras.layers import UpSampling1D, GRU

# UpSampling1D(2)(b).shape

TensorShape([24, 700, 64])

In [None]:
# 

In [None]:
# filters = 32
# x = original
# for _ in range(3):
#     x = residual_se_cnn_block(x, filters, 3, 1)
#     filters *= 2

# x.shape


TensorShape([24, 700, 128])

In [None]:
# c_shape = x.shape[-1]
# # x = Bidirectional(GRU(c_shape*2))(x)
# x = Dense(c_shape)(x)
# x.shape

TensorShape([24, 700, 128])

In [None]:
# x1 = x

# b = unet_se_cnn(original)
# b.shape

TensorShape([24, 348, 64])

In [None]:
# c = unet_se_cnn_bilstm(original)
# c.shape

ValueError: Input 0 of layer "up_sampling1d_8" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (24, 128)