In [1]:
import gc
import numpy as np
import polars as pl
import tensorflow as tf
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.utils import pad_sequences, to_categorical

from src.nn_blocks import (
    unet_se_cnn,
    features_processing, 
    GatedMixupGenerator,
    residual_se_cnn_block,
    attention_layer,
    wave_block
)

from src.merge_feats_dynamic import merge_feature_sets

from src.functions import (
    train_model, 
    create_sequence_dataset,
    perform_padding,
    generate_gate_targets
)

NUM_CLASSES = 18

# =====================================================================================
# SEGMENT: 8 MODEL ARCHITECTURES FOR EXPERIMENTATION
# =====================================================================================

# --- Model 1: Pure CNN Baseline (Fast & Strong) ---
# Hypothesis: Local patterns and textures in the signal are the most important features.
def create_model_1_cnn_baseline(input_shape, wd=1e-4):
    inp = tf.keras.layers.Input(shape=input_shape)
    
    # A series of convolutional blocks to extract features at different scales
    x = residual_se_cnn_block(inp, 64, 3, drop=0.2, wd=wd)
    x = residual_se_cnn_block(x, 128, 5, drop=0.2, wd=wd)
    x = residual_se_cnn_block(x, 256, 7, drop=0.2, wd=wd)
    
    # Use both average and max pooling to get a rich summary
    avg_pool = tf.keras.layers.GlobalAveragePooling1D()(x)
    max_pool = tf.keras.layers.GlobalMaxPooling1D()(x)
    x = tf.keras.layers.Concatenate()([avg_pool, max_pool])
    
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    main_out = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name="main_output")(x)
    
    return tf.keras.models.Model(inputs=inp, outputs=main_out)

# --- Model 2: Pure RNN Baseline (GRU) ---
# Hypothesis: The sequential order and long-range dependencies are the most important features.
def create_model_2_gru_baseline(input_shape, wd=1e-4):
    inp = tf.keras.layers.Input(shape=input_shape)
    
    # Add noise for regularization
    x = tf.keras.layers.GaussianNoise(0.05)(inp)
    
    # Stacked tf.keras.layers.Bidirectional GRUs to learn temporal patterns from both directions
    x = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(128, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(wd)))(x)
    x = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(128, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(wd)))(x)
    
    # Use an attention layer to intelligently summarize the sequence
    x = attention_layer(x)
    
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    main_out = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name="main_output")(x)
    
    return tf.keras.models.Model(inputs=inp, outputs=main_out)

# --- Model 3: CNN-RNN Hybrid (Classic & Powerful) ---
# Hypothesis: Combining local CNN features with sequential RNN context is optimal.
def create_model_3_cnn_rnn_hybrid(input_shape, wd=1e-4):
    inp = tf.keras.layers.Input(shape=input_shape)
    
    # CNN backbone for feature extraction
    x = residual_se_cnn_block(inp, 64, 3, drop=0.2, wd=wd)
    x = residual_se_cnn_block(x, 128, 5, drop=0.2, wd=wd)
    
    # RNN head for sequence understanding
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(wd)))(x)
    
    # Attention to summarize
    x = attention_layer(x)
    
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    main_out = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name="main_output")(x)
    
    return tf.keras.models.Model(inputs=inp, outputs=main_out)

# --- Model 4: WaveNet-Style (Dilated Convolutions) ---
# Hypothesis: Capturing multi-scale temporal context with dilated convolutions is key.
def create_model_4_wavenet_style(input_shape, wd=1e-4):
    inp = tf.keras.layers.Input(shape=input_shape)
    
    # Initial projection
    x = tf.keras.layers.Conv1D(64, 1, padding='same')(inp)
    
    # Stacked WaveBlocks to learn at different time scales
    x = wave_block(x, 64, 3, n=4, dropout_rate=0.3) # n=4 -> dilations 1, 2, 4, 8
    x = wave_block(x, 128, 3, n=3, dropout_rate=0.3) # n=3 -> dilations 1, 2, 4
    
    # Final convolution before pooling
    x = tf.keras.layers.Conv1D(256, 1, activation='relu', padding='same')(x)
    
    # Concat pooling to close
    avg_pool = tf.keras.layers.GlobalAveragePooling1D()(x)
    max_pool = tf.keras.layers.GlobalMaxPooling1D()(x)
    x = tf.keras.layers.Concatenate()([avg_pool, max_pool])
    
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    main_out = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name="main_output")(x)
    
    return tf.keras.models.Model(inputs=inp, outputs=main_out)

# --- Model 5: U-Net Style Encoder-Decoder ---
# Hypothesis: A multi-resolution analysis of the signal (like in image segmentation) is effective.
def create_model_5_unet_style(input_shape, wd=1e-4):
    inp = tf.keras.layers.Input(shape=input_shape)
    
    # A deep U-Net to learn features at multiple time resolutions
    x = unet_se_cnn(inp, unet_depth=4, base_filters=64, kernel_size=5, drop=0.3)
    
    # Attention to summarize the final feature sequence
    x = attention_layer(x)
    
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    main_out = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name="main_output")(x)
    
    return tf.keras.models.Model(inputs=inp, outputs=main_out)

# --- Model 6: Lightweight Transformer ---
# Hypothesis: Global self-attention is sufficient to solve the problem.
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super().__init__(**kwargs)
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([tf.keras.layers.Dense(ff_dim, activation="gelu"), tf.keras.layers.Dense(embed_dim)])
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
    def call(self, inputs, training=None):
        attn_output = self.att(inputs, inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    def get_config(self):
        config = super().get_config()
        config.update({"embed_dim": self.att.key_dim, "num_heads": self.att.num_heads, "ff_dim": self.ffn.layers[0].units, "rate": self.dropout1.rate})
        return config

def create_model_6_transformer(input_shape, wd=1e-4):
    inp = tf.keras.layers.Input(shape=input_shape)
    
    # Initial projection to a higher dimension (embedding)
    x = tf.keras.layers.Dense(128)(inp)
    
    # Stacked Transformer blocks
    x = TransformerBlock(embed_dim=128, num_heads=4, ff_dim=128, rate=0.2)(x)
    x = TransformerBlock(embed_dim=128, num_heads=4, ff_dim=128, rate=0.2)(x)
    
    # Global pooling to aggregate time
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    main_out = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name="main_output")(x)
    
    return tf.keras.models.Model(inputs=inp, outputs=main_out)

# --- Model 7: CNN-Transformer Hybrid (State-of-the-Art) ---
# Hypothesis: Use CNNs for efficient local feature extraction, then a Transformer for global context.
def create_model_7_cnn_transformer(input_shape, wd=1e-4):
    inp = tf.keras.layers.Input(shape=input_shape)
    
    # CNN backbone for feature extraction and downsampling
    x = residual_se_cnn_block(inp, 64, 3, drop=0.2, wd=wd)
    x = residual_se_cnn_block(x, 128, 5, drop=0.2, wd=wd)
    
    # Transformer head for global context
    x = TransformerBlock(embed_dim=128, num_heads=4, ff_dim=128, rate=0.3)(x)
    
    # Global pooling to aggregate time
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    main_out = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name="main_output")(x)
    
    return tf.keras.models.Model(inputs=inp, outputs=main_out)

# # --- Model 8: Your Two-Branch Architecture (IMU + ToF) ---
# # This is your existing model, included for a direct comparison.

# from src.nn_blocks import tof_block

# def create_model_8_two_branch(input_shape, imu_dim, wd=1e-4):
#     inp = tf.keras.layers.Input(shape=input_shape)
#     imu = tf.keras.layers.Lambda(lambda t: t[:, :, :imu_dim])(inp)
#     tof = tf.keras.layers.Lambda(lambda t: t[:, :, imu_dim:])(inp)

#     x1 = unet_se_cnn(imu, 4, base_filters=128, kernel_size=3)
#     x2 = tof_block(tof, wd)

#     x = features_processing(x1, x2)
#     x = tf.keras.layers.Dropout(0.3)(x) 
#     main_out = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name="main_output")(x)
#     gate_out = tf.keras.layers.Dense(1, activation="sigmoid", name="tof_gate")(x)
    
#     return tf.keras.models.Model(inputs=inp, outputs={"main_output": main_out, "tof_gate": gate_out})

2025-08-14 01:06:28.478705: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755129988.498575 1465473 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755129988.504215 1465473 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755129988.520183 1465473 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755129988.520213 1465473 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755129988.520215 1465473 computation_placer.cc:177] computation placer alr

In [2]:
input_shape = (128, 41)
model_builders = [
    ("CNN_Baseline", lambda: create_model_1_cnn_baseline(input_shape)),
    ("GRU_Baseline", lambda: create_model_2_gru_baseline(input_shape)),
    ("CNN_RNN_Hybrid", lambda: create_model_3_cnn_rnn_hybrid(input_shape)),
    ("WaveNet_Style", lambda: create_model_4_wavenet_style(input_shape)),
    ("UNet_Style", lambda: create_model_5_unet_style(input_shape)),
    ("Transformer", lambda: create_model_6_transformer(input_shape)),
    ("CNN_Transformer", lambda: create_model_7_cnn_transformer(input_shape)),
    # For your two-branch model, you'll need the full IMU+ToF dataset
    # ("Two_Branch", lambda: create_model_8_two_branch(input_shape, imu_dim)),
]

def create_sequence_dataset_simple(df: pl.DataFrame, feature_cols: list):
    sequences, labels = [], []
    for seq_id, group in df.group_by('sequence_id', maintain_order=True):
        sequences.append(group.select(feature_cols).to_numpy())
        labels.append(group.select('gesture_int').item(0, 0))
    return np.array(sequences, dtype=object), np.array(labels)

def train_single_output_model(model, train_dataset, val_dataset, epochs, initial_learning_rate, weight_decay):
    optimizer = tf.keras.optimizers.AdamW(learning_rate=initial_learning_rate, weight_decay=weight_decay)
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True, mode='max')
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(train_dataset, validation_data=val_dataset, epochs=epochs, callbacks=[early_stopping])
    return history

In [3]:
final_df.columns

NameError: name 'final_df' is not defined

In [None]:
# =====================================================================================
# CONFIGURATION
# =====================================================================================
LR_INIT = 5e-4
WD = 3e-3
NUM_CLASSES = 18
BATCH_SIZE = 64
N_SPLITS = 4 
MAX_PAD_LEN = 128

# =====================================================================================
# MODEL DEFINITION (Your existing function)
# =====================================================================================

from src.nn_blocks import (
    wave_block, residual_se_cnn_block, tof_block_2, attention_layer
)

def create_model(dataset, imu_dim, wd=1e-4):
    sample_batch = next(iter(dataset))
    input_shape = sample_batch[0].shape[1:]
    inp = tf.keras.layers.Input(shape=input_shape)
    imu = tf.keras.layers.Lambda(lambda t: t[:, :, :imu_dim])(inp)
    tof = tf.keras.layers.Lambda(lambda t: t[:, :, imu_dim:])(inp)

    xa = unet_se_cnn(imu, 3, base_filters=128, kernel_size=3) # 64,128
    xa = unet_se_cnn(xa, 3, base_filters=128, kernel_size=5)
    # xb = tf.keras.layers.MaxPool1D(2)(xb) # 64,128
    # x1 = tf.keras.layers.Concatenate()([xa, xb])
    # x1 = tf.keras.layers.Conv1D(filters=128, kernel_size=3, strides=2, padding='same', activation='relu')(x1)

    # input_shape=[(None, 64, 256), (None, 32, 128)
    x2 = tof_block_2(tof, wd) 

    x = features_processing(xa, x2)
    x = tf.keras.layers.Dropout(0.3)(x) 
    main_out = tf.keras.layers.Dense(18, activation="softmax", name="main_output")(x)
    gate_out = tf.keras.layers.Dense(1, activation="sigmoid", name="tof_gate")(x) # Renamed layer
    
    return tf.keras.models.Model(inputs=inp, outputs={"main_output": main_out, "tof_gate": gate_out})

# =====================================================================================
# TRAINING LOGIC
# =====================================================================================

FEATURE_DIR = Path('output')
RAW_DIR = Path('input/cmi-detect-behavior-with-sensor-data')
RANDOM_STATE = 42

files_to_merge = [
    # "imu_physics_feats.parquet",
    # "imu_rolling_stats_features.parquet",
    # "imu_cross_modal_features.parquet",
    # 'output/kaggle_0.8_feats.parquet',
    'imu_basic_physics_feats.parquet',
    'tof_basic_kaggle_feats.parquet'
    ]

feature_paths = [FEATURE_DIR / f for f in files_to_merge]
base_df = pl.read_parquet(FEATURE_DIR / "cleaned_base_train_data.parquet")
demographics_df = pl.read_csv(RAW_DIR / "train_demographics.csv")
base_df = base_df.join(demographics_df, on='subject', how='left')

# Select only the necessary metadata columns for the base
meta_cols = ['sequence_id', 'sequence_counter', 'subject', 'gesture']
thm_cols = ['thm_1', 'thm_2', 'thm_3', 'thm_4', 'thm_5']
base_df = base_df.select(meta_cols+thm_cols)

le = LabelEncoder()
gesture_encoded = le.fit_transform(base_df.get_column('gesture'))
base_df = base_df.with_columns(pl.Series("gesture_int", gesture_encoded))  

final_df = merge_feature_sets(base_df, feature_paths)
print(f"  Final merged DataFrame created with shape: {final_df.shape}")

all_final_columns = final_df.columns
final_meta_cols = {'gesture', 'gesture_int', 'subject', 'sequence_id', 'sequence_counter'}
demographic_cols = {'adult_child', 'age', 'sex', 'handedness', 'height_cm', 'shoulder_to_wrist_cm', 'elbow_to_wrist_cm'}
all_feature_cols = [c for c in all_final_columns if c not in final_meta_cols and c not in demographic_cols]

imu_cols = [c for c in all_feature_cols if c.startswith(('acc_', 'rot_', 'linear_', 'angular_'))]
imu_dim = len(imu_cols)

cv_info = final_df.group_by("sequence_id").agg(pl.first("gesture_int")).sort("sequence_id")
all_sequence_ids = cv_info.get_column("sequence_id").to_numpy()
y_for_split = cv_info.get_column("gesture_int").to_numpy()

input_shape = (MAX_PAD_LEN, len(all_feature_cols)) 

model_builders = [
    ("CNN_Baseline", lambda: create_model_1_cnn_baseline(input_shape)),
    ("GRU_Baseline", lambda: create_model_2_gru_baseline(input_shape)),
    ("CNN_RNN_Hybrid", lambda: create_model_3_cnn_rnn_hybrid(input_shape)),
    ("WaveNet_Style", lambda: create_model_4_wavenet_style(input_shape)),
    ("UNet_Style", lambda: create_model_5_unet_style(input_shape)),
    ("Transformer", lambda: create_model_6_transformer(input_shape)),
    ("CNN_Transformer", lambda: create_model_7_cnn_transformer(input_shape)),
    # This model requires the full input_shape and the imu_dim to split the branches
    # ("Two_Branch", lambda: create_model_8_two_branch(input_shape, imu_dim)),
]

for model_name, model_builder in model_builders:
    print("\n" + "="*60)
    print(f"▶ Training and Evaluating Model: {model_name}")
    print("="*60)

    kf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_STATE)
    fold_accuracies = []
    all_preds = []
    all_labels = []

    for fold_idx, (train_indices, val_indices) in enumerate(kf.split(all_sequence_ids, y_for_split)):
        print(f"\n=== Fold {fold_idx + 1}/{N_SPLITS} for {model_name} ===")
        train_ids = all_sequence_ids[train_indices]
        val_ids = all_sequence_ids[val_indices]

        train_df = final_df.filter(pl.col('sequence_id').is_in(train_ids))
        val_df = final_df.filter(pl.col('sequence_id').is_in(val_ids))
        
        # --- Data Preparation for the Fold ---
        scaler = StandardScaler()
        train_features_scaled = scaler.fit_transform(train_df[all_feature_cols])
        val_features_scaled = scaler.transform(val_df[all_feature_cols])
        X_train_scaled_features = pl.DataFrame(train_features_scaled, schema=all_feature_cols)
        X_val_scaled_features = pl.DataFrame(val_features_scaled, schema=all_feature_cols)

        meta_cols_to_keep = ['sequence_id', 'gesture_int']
        train_df_final = train_df.select(meta_cols_to_keep).with_columns(X_train_scaled_features)
        val_df_final = val_df.select(meta_cols_to_keep).with_columns(X_val_scaled_features)

        X_train, y_train = create_sequence_dataset_simple(train_df_final, all_feature_cols)
        X_val, y_val = create_sequence_dataset_simple(val_df_final, all_feature_cols)

        X_train_padded = pad_sequences(X_train, maxlen=MAX_PAD_LEN, padding='post', truncating='post', dtype='float32')
        X_val_padded = pad_sequences(X_val, maxlen=MAX_PAD_LEN, padding='post', truncating='post', dtype='float32')
        
        y_train_cat = to_categorical(y_train, num_classes=NUM_CLASSES)
        y_val_cat = to_categorical(y_val, num_classes=NUM_CLASSES)
        
 
        train_dataset = tf.data.Dataset.from_tensor_slices(
            (X_train_padded, y_train_cat)
        ).shuffle(len(X_train_padded)).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
        val_dataset = tf.data.Dataset.from_tensor_slices(
            (X_val_padded, y_val_cat)
        ).batch(BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)

        del X_train, y_train, X_val, y_val, X_train_padded, X_val_padded
        gc.collect()
        
        # --- Build and Train Model ---
        model = model_builder()
        
        # Use the appropriate training function based on the model type
        if model_name == "Two_Branch":
            history = train_model(model, train_dataset, val_dataset, 150, LR_INIT, WD)
        else:
            history = train_single_output_model(model, train_dataset, val_dataset, 150, LR_INIT, WD)
        
        # --- Evaluation ---
        val_preds = model.predict(val_dataset)
        main_output_preds = val_preds['main_output'] if model_name == "Two_Branch" else val_preds
        
        y_pred_fold = np.argmax(main_output_preds, axis=1)
        y_true_fold = np.argmax(y_val_cat, axis=1)
        fold_acc = accuracy_score(y_true_fold, y_pred_fold)
        fold_accuracies.append(fold_acc)
        print(f"Fold {fold_idx + 1} Accuracy: {fold_acc:.4f}")
        all_preds.append(y_pred_fold)
        all_labels.append(y_true_fold)

        del train_dataset, model, val_dataset
        gc.collect()

    # --- FINAL OOF REPORT for this model architecture ---
    print(f"\n=== OOF Summary for {model_name} ===")
    print(f"Per-fold Accuracies: {[round(a, 4) for a in fold_accuracies]}")
    print(f"Mean Accuracy: {np.mean(fold_accuracies):.4f} ± {np.std(fold_accuracies):.4f}")
    y_all_pred = np.concatenate(all_preds)
    y_all_true = np.concatenate(all_labels)
    print("\n=== Overall Classification Report ===")
    print(classification_report(y_all_true, y_all_pred, target_names=le.classes_, digits=4))

 Starting merge process...
  Loading and joining features from: imu_basic_physics_feats.parquet
  Loading and joining features from: tof_basic_kaggle_feats.parquet
  Merge complete.

▶ Training and Evaluating Model: CNN_Baseline

=== Fold 1/4 for CNN_Baseline ===


I0000 00:00:1755129927.854390 1464569 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4714 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1060, pci bus id: 0000:01:00.0, compute capability: 6.1
2025-08-14 01:05:27.857049: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 128324096 exceeds 10% of free system memory.
2025-08-14 01:05:28.158680: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 128324096 exceeds 10% of free system memory.
2025-08-14 01:05:28.275464: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 42781696 exceeds 10% of free system memory.
2025-08-14 01:05:28.332931: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 42781696 exceeds 10% of free system memory.


Epoch 1/150


2025-08-14 01:05:29.429831: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 128324096 exceeds 10% of free system memory.
I0000 00:00:1755129936.539266 1465181 service.cc:152] XLA service 0x73b010004ab0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1755129936.539316 1465181 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce GTX 1060, Compute Capability 6.1
2025-08-14 01:05:36.804330: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1755129938.005682 1465181 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m 9/96[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 16ms/step - accuracy: 0.0738 - loss: 5.1388 

I0000 00:00:1755129952.090723 1465181 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step - accuracy: 0.1076 - loss: 3.8451

KeyboardInterrupt: 

In [None]:
import traceback
# =====================================================================================
# ARCHITECTURE SANITY CHECK
# =====================================================================================

# --- Step 1: Get a sample batch and define shapes ---
# (This part of your code is correct)
# Make sure your train_dataset is created before this block
try:
    sample_batch = next(iter(train_dataset))
    sample_input = sample_batch[0]
    input_shape = sample_input.shape[1:]
    imu_dim = len(imu_cols) # Assuming imu_cols is defined
    print(f"Sample input shape for testing: {sample_input.shape}\n")
except Exception as e:
    print(f"Could not get a sample from the dataset. Error: {e}")
    # Exit if we can't get a sample to test with
    exit()

# --- Step 2: Create a list of all model-building functions ---
# (This part of your code is correct)
model_builders = [
    ("CNN_Baseline", lambda: create_model_1_cnn_baseline(input_shape)),
    ("GRU_Baseline", lambda: create_model_2_gru_baseline(input_shape)),
    ("CNN_RNN_Hybrid", lambda: create_model_3_cnn_rnn_hybrid(input_shape)),
    ("WaveNet_Style", lambda: create_model_4_wavenet_style(input_shape)),
    ("UNet_Style", lambda: create_model_5_unet_style(input_shape)),
    ("Transformer", lambda: create_model_6_transformer(input_shape)),
    ("CNN_Transformer", lambda: create_model_7_cnn_transformer(input_shape)),
    # For your two-branch model, you'll need the full IMU+ToF dataset
    # ("Two_Branch", lambda: create_model_8_two_branch(input_shape, imu_dim)),
]

# --- Step 3: Loop through the models, build them, and test with the sample ---
print("--- Testing all model architectures with a sample batch ---")
for model_name, model_builder in model_builders:
    print("\n" + "="*60)
    print(f"▶ Testing Model: {model_name}")
    print("="*60)
    
    try:
        # 1. Build the model using the builder function
        model = model_builder()
        
        # Optional: Print the model summary to check its structure
        print(f"Model Summary for {model_name}:")
        model.summary()
        
        # 2. Pass the sample input through the model
        print(f"\nPerforming forward pass for {model_name}...")
        output = model(sample_input)
        
        # 3. Print the output shape to verify it's correct
        print(f"✅ SUCCESS: Model '{model_name}' ran successfully.")
        # For multi-output models, output might be a list/dict. For single, it's a tensor.
        if isinstance(output, dict):
            for key, value in output.items():
                print(f"   Output '{key}' shape: {value.shape}")
        elif isinstance(output, list):
            for i, value in enumerate(output):
                print(f"   Output {i} shape: {value.shape}")
        else:
            print(f"   Output shape: {output.shape}")

    except Exception as e:
        print(f"❌ ERROR: Model '{model_name}' failed to build or run.")
        traceback.print_exc() # This will print the full error traceback
        
    # Clean up the created model to save memory
    del model
    gc.collect()

print("\n--- Model architecture testing complete. ---")

Sample input shape for testing: (64, 128, 36)

--- Testing all model architectures with a sample batch ---

▶ Testing Model: CNN_Baseline
Model Summary for CNN_Baseline:



Performing forward pass for CNN_Baseline...
✅ SUCCESS: Model 'CNN_Baseline' ran successfully.
   Output shape: (64, 18)

▶ Testing Model: GRU_Baseline
Model Summary for GRU_Baseline:



Performing forward pass for GRU_Baseline...
✅ SUCCESS: Model 'GRU_Baseline' ran successfully.
   Output shape: (64, 18)

▶ Testing Model: CNN_RNN_Hybrid
Model Summary for CNN_RNN_Hybrid:



Performing forward pass for CNN_RNN_Hybrid...
✅ SUCCESS: Model 'CNN_RNN_Hybrid' ran successfully.
   Output shape: (64, 18)

▶ Testing Model: WaveNet_Style
Model Summary for WaveNet_Style:



Performing forward pass for WaveNet_Style...
✅ SUCCESS: Model 'WaveNet_Style' ran successfully.
   Output shape: (64, 18)

▶ Testing Model: UNet_Style
Model Summary for UNet_Style:



Performing forward pass for UNet_Style...
✅ SUCCESS: Model 'UNet_Style' ran successfully.
   Output shape: (64, 18)

▶ Testing Model: Transformer
Model Summary for Transformer:



Performing forward pass for Transformer...
✅ SUCCESS: Model 'Transformer' ran successfully.
   Output shape: (64, 18)

▶ Testing Model: CNN_Transformer
Model Summary for CNN_Transformer:



Performing forward pass for CNN_Transformer...
✅ SUCCESS: Model 'CNN_Transformer' ran successfully.
   Output shape: (64, 18)

--- Model architecture testing complete. ---
