In [1]:
import os, json, joblib, numpy as np, pandas as pd
from pathlib import Path
import warnings 
warnings.filterwarnings("ignore")

from scipy.spatial.transform import Rotation as R

from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras.utils import Sequence, to_categorical, pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Conv1D, BatchNormalization, Activation, add, MaxPooling1D, Dropout,
    Bidirectional, LSTM, GlobalAveragePooling1D, Dense, Multiply, Reshape,
    Lambda, Concatenate, GRU, GaussianNoise
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
import tensorflow as tf
import polars as pl

import matplotlib.pyplot as plt

2025-08-08 21:42:36.496377: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754685756.515512   97980 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754685756.521000   97980 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754685756.538160   97980 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1754685756.538192   97980 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1754685756.538194   97980 computation_placer.cc:177] computation placer alr

In [2]:
import random
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.experimental.numpy.random.seed(seed)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
seed_everything(seed=42)

In [3]:
# (Competition metric will only be imported when TRAINing)
TRAIN = False                
DEBUG_GATE = False  # <-- [新機能] デバッグモードを有効化
                     
# RAW_DIR = Path("/kaggle/input/cmi-detect-behavior-with-sensor-data")
# RAW_DIR = Path("/input/cmi-detect-behavior-with-sensor-data")
# PRETRAINED_DIR = Path("/kaggle/input/cmi-pretrained-model-v5")
# EXPORT_DIR = Path("./")
RAW_DIR = Path("input/cmi-detect-behavior-with-sensor-data")
# PRETRAINED_DIR = Path("/kaggle/input/cmi-pretrained-model-v5")
EXPORT_DIR = Path("./")
BATCH_SIZE = 64
PAD_PERCENTILE = 95
LR_INIT = 5e-4
WD = 3e-3
MIXUP_ALPHA = 0.4
EPOCHS = 160
PATIENCE = 40
N_SPLITS = 5
MASKING_PROB = 0.25 # <-- [新機能] 学習時にTOF/THMをマスクする確率
GATE_LOSS_WEIGHT = 0.2

print("▶ imports ready · tensorflow", tf.__version__)

▶ imports ready · tensorflow 2.19.0


In [4]:
def remove_gravity_from_acc(acc_data, rot_data):
    acc_values = acc_data[['acc_x', 'acc_y', 'acc_z']].values
    quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    linear_accel = np.zeros_like(acc_values)
    gravity_world = np.array([0, 0, 9.81])
    for i in range(len(acc_values)):
        if np.all(np.isnan(quat_values[i])) or np.all(np.isclose(quat_values[i], 0)):
            linear_accel[i, :] = acc_values[i, :]
            continue
        try:
            rotation = R.from_quat(quat_values[i])
            gravity_sensor_frame = rotation.apply(gravity_world, inverse=True)
            linear_accel[i, :] = acc_values[i, :] - gravity_sensor_frame
        except ValueError:
             linear_accel[i, :] = acc_values[i, :]
    return linear_accel

def calculate_angular_velocity_from_quat(rot_data, time_delta=1/200):
    quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    angular_vel = np.zeros((len(quat_values), 3))
    for i in range(len(quat_values) - 1):
        q_t, q_t_plus_dt = quat_values[i], quat_values[i+1]
        if np.all(np.isnan(q_t)) or np.all(np.isnan(q_t_plus_dt)): continue
        try:
            rot_t = R.from_quat(q_t)
            rot_t_plus_dt = R.from_quat(q_t_plus_dt)
            delta_rot = rot_t.inv() * rot_t_plus_dt
            angular_vel[i, :] = delta_rot.as_rotvec() / time_delta
        except ValueError: pass
    return angular_vel

def calculate_angular_distance(rot_data):
    quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    angular_dist = np.zeros(len(quat_values))
    for i in range(len(quat_values) - 1):
        q1, q2 = quat_values[i], quat_values[i+1]
        if np.all(np.isnan(q1)) or np.all(np.isnan(q2)): continue
        try:
            r1, r2 = R.from_quat(q1), R.from_quat(q2)
            relative_rotation = r1.inv() * r2
            angular_dist[i] = np.linalg.norm(relative_rotation.as_rotvec())
        except ValueError: pass
    return angular_dist

In [5]:
#Tensor Manipulations
def time_sum(x): return K.sum(x, axis=1)
def squeeze_last_axis(x): return tf.squeeze(x, axis=-1)
def expand_last_axis(x): return tf.expand_dims(x, axis=-1)

def se_block(x, reduction=8):
    ch = x.shape[-1]
    se = GlobalAveragePooling1D()(x)
    se = Dense(ch // reduction, activation='relu')(se)
    se = Dense(ch, activation='sigmoid')(se)
    se = Reshape((1, ch))(se)
    return Multiply()([x, se])

def residual_se_cnn_block(x, filters, kernel_size, pool_size=2, drop=0.3, wd=1e-4):
    shortcut = x
    for _ in range(2):
        x = Conv1D(filters, kernel_size, padding='same', use_bias=False,
                   kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
    x = se_block(x)
    if shortcut.shape[-1] != filters:
        shortcut = Conv1D(filters, 1, padding='same', use_bias=False,
                          kernel_regularizer=l2(wd))(shortcut)
        shortcut = BatchNormalization()(shortcut)
    x = add([x, shortcut])
    x = Activation('relu')(x)
    x = MaxPooling1D(pool_size)(x)
    x = Dropout(drop)(x)
    return x

def attention_layer(inputs):
    score = Dense(1, activation='tanh')(inputs)
    score = Lambda(squeeze_last_axis)(score)
    weights = Activation('softmax')(score)
    weights = Lambda(expand_last_axis)(weights)
    context = Multiply()([inputs, weights])
    context = Lambda(time_sum)(context)
    return context

In [7]:
class GatedMixupGenerator(Sequence):
    def __init__(self, X, y, batch_size, imu_dim, class_weight=None, alpha=0.2, masking_prob=0.0):
        self.X, self.y = X, y
        self.batch = batch_size
        self.imu_dim = imu_dim
        self.class_weight = class_weight
        self.alpha = alpha
        self.masking_prob = masking_prob
        self.indices = np.arange(len(X))
        
    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch))

    def __getitem__(self, i):
        idx = self.indices[i*self.batch:(i+1)*self.batch]
        Xb, yb = self.X[idx].copy(), self.y[idx].copy()
        
        # サンプルごとの重みを計算
        sample_weights = np.ones(len(Xb), dtype='float32')
        if self.class_weight:
            y_integers = yb.argmax(axis=1)
            sample_weights = np.array([self.class_weight[i] for i in y_integers])
        
        gate_target = np.ones(len(Xb), dtype='float32')
        if self.masking_prob > 0:
            for i in range(len(Xb)):
                if np.random.rand() < self.masking_prob:
                    Xb[i, :, self.imu_dim:] = 0
                    gate_target[i] = 0.0

        if self.alpha > 0:
            lam = np.random.beta(self.alpha, self.alpha)
            perm = np.random.permutation(len(Xb))
            X_mix = lam * Xb + (1 - lam) * Xb[perm]
            y_mix = lam * yb + (1 - lam) * yb[perm]
            gate_target_mix = lam * gate_target + (1 - lam) * gate_target[perm]
            sample_weights_mix = lam * sample_weights + (1 - lam) * sample_weights[perm]
            return X_mix, {'main_output': y_mix, 'tof_gate': gate_target_mix}, sample_weights_mix

        return Xb, {'main_output': yb, 'tof_gate': gate_target}, sample_weights

    def on_epoch_end(self):
        np.random.shuffle(self.indices)

In [8]:
from src.nn_blocks import unet_se_cnn

def build_gated_two_branch_model(pad_len, imu_dim, tof_dim, n_classes, wd=1e-4):
    inp = Input(shape=(pad_len, imu_dim+tof_dim))
    imu = Lambda(lambda t: t[:, :, :imu_dim])(inp)
    tof = Lambda(lambda t: t[:, :, imu_dim:])(inp)

    x1 = unet_se_cnn(imu, 32, 2, drop=0.1)#, wd=wd)

    x2_base = Conv1D(16, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(tof)
    x2_base = BatchNormalization()(x2_base); x2_base = Activation('relu')(x2_base)
    x2_base = MaxPooling1D(2)(x2_base); x2_base = Dropout(0.2)(x2_base)
    x2_base = Conv1D(32, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(x2_base)
    x2_base = BatchNormalization()(x2_base); x2_base = Activation('relu')(x2_base)
    x2_base = MaxPooling1D(2)(x2_base); x2_base = Dropout(0.2)(x2_base)
    
    gate_input = GlobalAveragePooling1D()(tof)
    gate_input = Dense(16, activation='relu')(gate_input)
    gate = Dense(1, activation='sigmoid', name='tof_gate')(gate_input)
    
    x2 = Multiply()([x2_base, gate])

    merged = Concatenate()([x1, x2])
    xa = Bidirectional(LSTM(32, return_sequences=True, kernel_regularizer=l2(wd)))(merged)
    xb = Bidirectional(GRU(32, return_sequences=True, kernel_regularizer=l2(wd)))(merged)
    xc = GaussianNoise(0.09)(merged)
    xc = Dense(16, activation='elu')(xc)
    x = Concatenate()([xa, xb, xc])
    x = Dropout(0.4)(x)
    x = attention_layer(x)
    for units, drop in [(129, 0.5), (32, 0.3)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x); x = Activation('relu')(x)
        x = Dropout(drop)(x)

    out = Dense(n_classes, activation='softmax', name='main_output', kernel_regularizer=l2(wd))(x)
    
    return Model(inputs=inp, outputs=[out, gate])

In [9]:
def build_gated_two_branch_model(pad_len, imu_dim, tof_dim, n_classes, wd=1e-4):
    inp = Input(shape=(pad_len, imu_dim+tof_dim))
    imu = Lambda(lambda t: t[:, :, :imu_dim])(inp)
    tof = Lambda(lambda t: t[:, :, imu_dim:])(inp)

    x1 = residual_se_cnn_block(imu, 64, 3, drop=0.1, wd=wd)
    x1 = residual_se_cnn_block(x1, 128, 5, drop=0.1, wd=wd)

    x2_base = Conv1D(64, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(tof)
    x2_base = BatchNormalization()(x2_base); x2_base = Activation('relu')(x2_base)
    x2_base = MaxPooling1D(2)(x2_base); x2_base = Dropout(0.2)(x2_base)
    x2_base = Conv1D(128, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(x2_base)
    x2_base = BatchNormalization()(x2_base); x2_base = Activation('relu')(x2_base)
    x2_base = MaxPooling1D(2)(x2_base); x2_base = Dropout(0.2)(x2_base)
    
    gate_input = GlobalAveragePooling1D()(tof)
    gate_input = Dense(16, activation='relu')(gate_input)
    # 補助損失の計算対象となるため、レイヤーに名前を付ける
    gate = Dense(1, activation='sigmoid', name='tof_gate')(gate_input)
    
    x2 = Multiply()([x2_base, gate])

    merged = Concatenate()([x1, x2])
    xa = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(merged)
    xb = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(wd)))(merged)
    xc = GaussianNoise(0.09)(merged)
    xc = Dense(16, activation='elu')(xc)
    x = Concatenate()([xa, xb, xc])
    x = Dropout(0.4)(x)
    x = attention_layer(x)
    for units, drop in [(256, 0.5), (128, 0.3)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x); x = Activation('relu')(x)
        x = Dropout(drop)(x)

    out = Dense(n_classes, activation='softmax', name='main_output', kernel_regularizer=l2(wd))(x)

    return Model(inputs=inp, outputs=[out, gate])

In [10]:
# if DEBUG_GATE and not TRAIN:
#     print("▶ GATE DEBUG MODE – preparing data to analyze trained models...")
    
#     # --- 1. 学習時と全く同じデータ準備プロセスを実行 ---
#     df = pd.read_csv(RAW_DIR / "train.csv")
#     train_dem_df = pd.read_csv(RAW_DIR / "train_demographics.csv")
#     df = pd.merge(df, train_dem_df, on='subject', how='left')
#     le = LabelEncoder()
#     df['gesture_int'] = le.fit_transform(df['gesture'])
#     print("  Calculating engineered features...")
#     df['acc_mag'] = np.sqrt(df['acc_x']**2 + df['acc_y']**2 + df['acc_z']**2)
#     df['rot_angle'] = 2 * np.arccos(df['rot_w'].clip(-1, 1))
#     df['acc_mag_jerk'] = df.groupby('sequence_id')['acc_mag'].diff().fillna(0)
#     df['rot_angle_vel'] = df.groupby('sequence_id')['rot_angle'].diff().fillna(0)
#     cols_for_stats = ['acc_mag', 'rot_angle', 'acc_mag_jerk', 'rot_angle_vel']
#     for col in cols_for_stats:
#         df[f'{col}_skew'] = df.groupby('sequence_id')[col].transform('skew')
#         df[f'{col}_kurt'] = df.groupby('sequence_id')[col].transform(pd.Series.kurtosis)
    
#     # --- [修正点] 成果物の読み込み元から特徴量リストを取得 ---
#     final_feature_cols = np.load(PRETRAINED_DIR / "feature_cols.npy", allow_pickle=True).tolist()
    
#     print("  Building sequences...")
#     seq_gp = df.groupby('sequence_id')
#     X_list_unscaled, y_list_int, groups_list, lens = [], [], [], []
#     for seq_id, seq_df in seq_gp:
#         seq_df_copy = seq_df.copy()
#         for i in range(1, 6):
#             pixel_cols_tof = [f"tof_{i}_v{p}" for p in range(64)]
#             tof_sensor_data = seq_df_copy[pixel_cols_tof].replace(-1, np.nan)
#             seq_df_copy[f'tof_{i}_mean'] = tof_sensor_data.mean(axis=1)
#             seq_df_copy[f'tof_{i}_std']  = tof_sensor_data.std(axis=1)
#             seq_df_copy[f'tof_{i}_min']  = tof_sensor_data.min(axis=1)
#             seq_df_copy[f'tof_{i}_max']  = tof_sensor_data.max(axis=1)
#         mat_unscaled = seq_df_copy[final_feature_cols].ffill().bfill().fillna(0).values.astype('float32')
#         X_list_unscaled.append(mat_unscaled)
#         y_list_int.append(seq_df_copy['gesture_int'].iloc[0])
#         groups_list.append(seq_df_copy['subject'].iloc[0])
#         lens.append(len(mat_unscaled))

#     print("  Loading scaler and padding sequences...")
#     # --- [修正点] 読み込み元を PRETRAINED_DIR に変更 ---
#     scaler = joblib.load(PRETRAINED_DIR / "scaler.pkl") 
#     pad_len = int(np.load(PRETRAINED_DIR / "sequence_maxlen.npy"))
    
#     X_scaled_list = [scaler.transform(x_seq) for x_seq in X_list_unscaled]
#     del X_list_unscaled
#     X = pad_sequences(X_scaled_list, maxlen=pad_len, padding='post', truncating='post', dtype='float32')
#     del X_scaled_list
#     y_stratify = np.array(y_list_int)
#     groups = np.array(groups_list)

#     # --- 2. CV分割を再現し、各フォールドのモデルを分析 ---
#     sgkf = StratifiedGroupKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
#     custom_objs = {
#         'time_sum': time_sum, 'squeeze_last_axis': squeeze_last_axis, 'expand_last_axis': expand_last_axis,
#         'se_block': se_block, 'residual_se_cnn_block': residual_se_cnn_block, 'attention_layer': attention_layer,
#     }

#     for fold, (train_idx, val_idx) in enumerate(sgkf.split(X, y_stratify, groups)):
#         print(f"\n===== Analyzing FOLD {fold+1}/{N_SPLITS} =====")
#         X_val = X[val_idx]
        
#         # --- [修正点] 読み込み元を PRETRAINED_DIR に変更 ---
#         model_path = PRETRAINED_DIR / f"gesture_model_fold_{fold}.h5"
#         if not model_path.exists():
#             print(f"Model file not found: {model_path}. Skipping this fold.")
#             continue
#         model = load_model(model_path, compile=False, custom_objects=custom_objs)

#         if 'tof_gate' not in [layer.name for layer in model.layers]:
#             print("Error: 'tof_gate' layer not found in the model.")
#             break
#         gate_output = model.get_layer('tof_gate').output
#         debug_model = Model(inputs=model.input, outputs=[model.output, gate_output])

#         print("  Predicting on validation data to get gate values...")
#         _, gate_values = debug_model.predict(X_val, batch_size=64) # BATCH_SIZE
#         gate_values = gate_values.flatten()

#         print(f"  Gate Value Stats: Mean={np.mean(gate_values):.4f}, Std={np.std(gate_values):.4f}, Min={np.min(gate_values):.4f}, Max={np.max(gate_values):.4f}")

#         plt.figure(figsize=(10, 5))
#         plt.hist(gate_values, bins=50, range=(0, 1), color='skyblue', edgecolor='black')
#         plt.title(f"Fold {fold+1} Gate Value Distribution", fontsize=16)
#         plt.xlabel("Gate Value (0 = TOF/THM Off, 1 = TOF/THM On)", fontsize=12)
#         plt.ylabel("Frequency", fontsize=12)
#         plt.grid(axis='y', alpha=0.75)
#         plt.show()
    
#     print("\n✔ Gate analysis finished.")

In [11]:
print("▶ TRAIN MODE – loading dataset …")
df = pd.read_csv(RAW_DIR / "train.csv")
train_dem_df = pd.read_csv(RAW_DIR / "train_demographics.csv")
df = pd.merge(df, train_dem_df, on='subject', how='left')
le = LabelEncoder()
df['gesture_int'] = le.fit_transform(df['gesture'])
np.save(EXPORT_DIR / "gesture_classes.npy", le.classes_)

# --- [変更点] 物理FEの導入 ---
print("  Removing gravity and calculating linear acceleration features...")
linear_accel_list = [pd.DataFrame(remove_gravity_from_acc(group[['acc_x', 'acc_y', 'acc_z']], group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]), columns=['linear_acc_x', 'linear_acc_y', 'linear_acc_z'], index=group.index) for _, group in df.groupby('sequence_id')]
df = pd.concat([df, pd.concat(linear_accel_list)], axis=1)
df['linear_acc_mag'] = np.sqrt(df['linear_acc_x']**2 + df['linear_acc_y']**2 + df['linear_acc_z']**2)
df['linear_acc_mag_jerk'] = df.groupby('sequence_id')['linear_acc_mag'].diff().fillna(0)

print("  Calculating angular velocity and distance from quaternions...")
angular_vel_list = [pd.DataFrame(calculate_angular_velocity_from_quat(group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]), columns=['angular_vel_x', 'angular_vel_y', 'angular_vel_z'], index=group.index) for _, group in df.groupby('sequence_id')]
df = pd.concat([df, pd.concat(angular_vel_list)], axis=1)
angular_dist_list = [pd.DataFrame(calculate_angular_distance(group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]), columns=['angular_distance'], index=group.index) for _, group in df.groupby('sequence_id')]
df = pd.concat([df, pd.concat(angular_dist_list)], axis=1)

# --- [変更点] 物理FEを反映した特徴量リスト ---
imu_cols_base = ['linear_acc_x', 'linear_acc_y', 'linear_acc_z'] + [c for c in df.columns if c.startswith('rot_')]
imu_engineered = ['linear_acc_mag', 'linear_acc_mag_jerk', 'angular_vel_x', 'angular_vel_y', 'angular_vel_z', 'angular_distance']
imu_cols = list(dict.fromkeys(imu_cols_base + imu_engineered))

▶ TRAIN MODE – loading dataset …
  Removing gravity and calculating linear acceleration features...
  Calculating angular velocity and distance from quaternions...


In [18]:
thm_cols_original = [c for c in df.columns if c.startswith('thm_')]
tof_aggregated_cols_template = []
for i in range(1, 6): tof_aggregated_cols_template.extend([f'tof_{i}_mean', f'tof_{i}_std', f'tof_{i}_min', f'tof_{i}_max'])

final_feature_cols = imu_cols + thm_cols_original + tof_aggregated_cols_template
imu_dim_final = len(imu_cols)
tof_thm_aggregated_dim_final = len(thm_cols_original) + len(tof_aggregated_cols_template)

print(f"  IMU (phys-based) {imu_dim_final} | THM + Aggregated TOF {tof_thm_aggregated_dim_final} | total {len(final_feature_cols)} features")
np.save(EXPORT_DIR / "feature_cols.npy", np.array(final_feature_cols))

  IMU (phys-based) 13 | THM + Aggregated TOF 25 | total 38 features


In [21]:
print("  Building sequences...")
seq_gp = df.groupby('sequence_id') 
X_list_unscaled, y_list_int, groups_list, lens = [], [], [], [] 
for seq_id, seq_df in seq_gp:
    seq_df_copy = seq_df.copy()
    for i in range(1, 6):
        pixel_cols = [f"tof_{i}_v{p}" for p in range(64)]; tof_data = seq_df_copy[pixel_cols].replace(-1, np.nan)
        seq_df_copy[f'tof_{i}_mean'], seq_df_copy[f'tof_{i}_std'], seq_df_copy[f'tof_{i}_min'], seq_df_copy[f'tof_{i}_max'] = tof_data.mean(axis=1), tof_data.std(axis=1), tof_data.min(axis=1), tof_data.max(axis=1)
    X_list_unscaled.append(seq_df_copy[final_feature_cols].ffill().bfill().fillna(0).values.astype('float32'))
    y_list_int.append(seq_df_copy['gesture_int'].iloc[0])
    groups_list.append(seq_df_copy['subject'].iloc[0])
    lens.append(len(seq_df_copy))

  Building sequences...


In [35]:
print("  Fitting StandardScaler...")
all_steps_concatenated = np.concatenate(X_list_unscaled, axis=0)
scaler = StandardScaler().fit(all_steps_concatenated)
joblib.dump(scaler, EXPORT_DIR / "scaler.pkl")

print("  Scaling and padding sequences...")
X_scaled_list = [scaler.transform(x_seq) for x_seq in X_list_unscaled]
pad_len = int(np.percentile(lens, PAD_PERCENTILE)); np.save(EXPORT_DIR / "sequence_maxlen.npy", pad_len)
X = pad_sequences(X_scaled_list, maxlen=pad_len, padding='post', truncating='post', dtype='float32')
y_stratify, groups, y = np.array(y_list_int), np.array(groups_list), to_categorical(y_list_int, num_classes=len(le.classes_))

  Fitting StandardScaler...
  Scaling and padding sequences...


In [None]:
print("  Starting training with Stratified Group K-Fold CV...")
sgkf = StratifiedGroupKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
oof_preds = np.zeros_like(y, dtype='float32')

for fold, (train_idx, val_idx) in enumerate(sgkf.split(X, y_stratify, groups)):
    print(f"\n===== FOLD {fold+1}/{N_SPLITS} =====")
    X_tr, X_val, y_tr, y_val = X[train_idx], X[val_idx], y[train_idx], y[val_idx]
    model = build_gated_two_branch_model(pad_len, imu_dim_final, tof_thm_aggregated_dim_final, len(le.classes_), wd=WD)
    model.compile(optimizer=Adam(LR_INIT),
                    loss={'main_output': tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1), 'tof_gate': tf.keras.losses.BinaryCrossentropy()},
                    loss_weights={'main_output': 1.0, 'tof_gate': GATE_LOSS_WEIGHT},
                    metrics={'main_output': 'accuracy'})
    class_weight_dict = dict(enumerate(compute_class_weight('balanced', classes=np.arange(len(le.classes_)), y=y_tr.argmax(1))))
    train_gen = GatedMixupGenerator(X_tr, y_tr, batch_size=BATCH_SIZE, imu_dim=imu_dim_final, class_weight=class_weight_dict, alpha=MIXUP_ALPHA, masking_prob=MASKING_PROB)
    val_gen = GatedMixupGenerator(X_val, y_val, batch_size=BATCH_SIZE, imu_dim=imu_dim_final)
    cb = EarlyStopping(patience=PATIENCE, restore_best_weights=True, verbose=1, monitor='val_main_output_accuracy', mode='max')
    break
    model.fit(train_gen, epochs=EPOCHS, validation_data=val_gen, callbacks=[cb], verbose=1)
    model.save(EXPORT_DIR / f"gesture_model_fold_{fold}.h5")
    preds_val, _ = model.predict(X_val); oof_preds[val_idx] = preds_val

# (OOFスコア計算部分はcmi_2025_metric_copy_for_import.pyがあれば有効化)
print("\n✔ Training done.")


  Starting training with Stratified Group K-Fold CV...

===== FOLD 1/5 =====


I0000 00:00:1754685456.886735   29313 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4416 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1060, pci bus id: 0000:01:00.0, compute capability: 6.1



✔ Training done.
Overall OOF H-F1 Score = 0.3935


In [None]:

# 評価指標ファイルをインポートしてOOFスコアを計算
from src.metric import CompetitionMetric
true_oof_int = y.argmax(1)
pred_oof_int = oof_preds.argmax(1)

h_f1_oof = CompetitionMetric().calculate_hierarchical_f1(
    pd.DataFrame({'gesture': le.classes_[true_oof_int]}),
    pd.DataFrame({'gesture': le.classes_[pred_oof_int]}))
print(f"Overall OOF H-F1 Score = {h_f1_oof:.4f}")

In [None]:
# else:
print("▶ INFERENCE MODE – loading artefacts from", PRETRAINED_DIR)
final_feature_cols = np.load(PRETRAINED_DIR / "feature_cols.npy", allow_pickle=True).tolist()
pad_len        = int(np.load(PRETRAINED_DIR / "sequence_maxlen.npy"))
scaler         = joblib.load(PRETRAINED_DIR / "scaler.pkl")
gesture_classes = np.load(PRETRAINED_DIR / "gesture_classes.npy", allow_pickle=True)

# Note: imu_dim_final and tof_thm_aggregated_dim_final are not needed for inference here
# as the predict function works on the full feature set.

custom_objs = {
    'time_sum': time_sum, 'squeeze_last_axis': squeeze_last_axis, 'expand_last_axis': expand_last_axis,
    'se_block': se_block, 'residual_se_cnn_block': residual_se_cnn_block, 'attention_layer': attention_layer,
}

models = []
print(f"  Loading {N_SPLITS} models for ensemble inference...")
for fold in range(N_SPLITS):
    # --- [重要] 推論時にはカスタムオブジェクトに`build_gated_two_branch_model`は不要 ---
    # 保存されたモデルはレイヤーの構造を保持しているため、カスタムレイヤー/関数のみ渡せばよい
    model_path = PRETRAINED_DIR / f"gesture_model_fold_{fold}.h5"
    model = load_model(model_path, compile=False, custom_objects=custom_objs)
    models.append(model)
print("  Models, scaler, feature_cols, pad_len loaded – ready for evaluation")

In [None]:
# --- [新機能] TTA用のハイパーパラメータ ---
TTA_STEPS = 10  # TTAの実行回数。5〜10あたりが一般的。
TTA_NOISE_STDDEV = 0.01 # 入力データに加えるノイズの標準偏差

# --- [変更点] TTAを実装したpredict関数 ---
def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    df_seq = sequence.to_pandas()
    
    # --- 1. 特徴量エンジニアリング（変更なし） ---
    linear_accel = remove_gravity_from_acc(df_seq, df_seq)
    df_seq['linear_acc_x'], df_seq['linear_acc_y'], df_seq['linear_acc_z'] = linear_accel[:, 0], linear_accel[:, 1], linear_accel[:, 2]
    df_seq['linear_acc_mag'] = np.sqrt(df_seq['linear_acc_x']**2 + df_seq['linear_acc_y']**2 + df_seq['linear_acc_z']**2)
    df_seq['linear_acc_mag_jerk'] = df_seq['linear_acc_mag'].diff().fillna(0)
    angular_vel = calculate_angular_velocity_from_quat(df_seq)
    df_seq['angular_vel_x'], df_seq['angular_vel_y'], df_seq['angular_vel_z'] = angular_vel[:, 0], angular_vel[:, 1], angular_vel[:, 2]
    df_seq['angular_distance'] = calculate_angular_distance(df_seq)
    for i in range(1, 6):
        pixel_cols = [f"tof_{i}_v{p}" for p in range(64)]; tof_data = df_seq[pixel_cols].replace(-1, np.nan)
        df_seq[f'tof_{i}_mean'], df_seq[f'tof_{i}_std'], df_seq[f'tof_{i}_min'], df_seq[f'tof_{i}_max'] = tof_data.mean(axis=1), tof_data.std(axis=1), tof_data.min(axis=1), tof_data.max(axis=1)
        
    mat_unscaled = df_seq[final_feature_cols].ffill().bfill().fillna(0).values.astype('float32')
    mat_scaled = scaler.transform(mat_unscaled)
    pad_input = pad_sequences([mat_scaled], maxlen=pad_len, padding='post', truncating='post', dtype='float32')

    # --- 2. TTAループの実行 ---
    all_tta_predictions = []
    for _ in range(TTA_STEPS):
        # 元の入力データにノイズを加える (TTA_STEPS=1回目はノイズなしでも良い)
        if TTA_STEPS > 1 and _ > 0: # 最初の1回は元のデータで予測
             noisy_input = pad_input + tf.random.normal(shape=tf.shape(pad_input), mean=0.0, stddev=TTA_NOISE_STDDEV)
        else:
             noisy_input = pad_input

        # 5フォールドモデルでアンサンブル予測
        all_fold_predictions = []
        for model in models:
            # 主出力(ジェスチャー確率)のみを取得
            main_preds, _ = model.predict(noisy_input, verbose=0)
            all_fold_predictions.append(main_preds)
        
        # フォールド間の予測を平均
        avg_fold_prediction = np.mean(all_fold_predictions, axis=0)
        all_tta_predictions.append(avg_fold_prediction)

    # --- 3. TTAの結果を最終的に平均化 ---
    final_avg_prediction = np.mean(all_tta_predictions, axis=0)
    
    # 最も確率の高いクラスのインデックスを取得
    idx = int(final_avg_prediction.argmax())
    
    return str(gesture_classes[idx])

In [None]:
if not TRAIN:
    import kaggle_evaluation.cmi_inference_server
    inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

    if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
        inference_server.serve()
    else:
        inference_server.run_local_gateway(
            data_paths=(
                '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
                '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
            )
        )

2025-06-26 00:47:21.722414: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
I0000 00:00:1750898842.782712      60 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-06-26 00:47:27.113142: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:var