In [None]:
import numpy as np
import pandas as pd
from pathlib import Path


OUTPUTS_DIR = Path("output")
EXPORT_DIR = Path("output")
EXPORT_DIR.mkdir(parents=True, exist_ok=True) 

metadata_cols = ['sequence_id', 'sequence_counter']
df = pd.read_parquet(OUTPUTS_DIR / "cleaned_base_train_data.parquet")

tof_feats = []
for i in range(1, 6):
    tof_feats.extend([f'tof_{i}_mean', f'tof_{i}_std', f'tof_{i}_min', f'tof_{i}_max'])

final_feature_cols = metadata_cols + tof_feats
seq_gp = df.groupby('sequence_id') 
processed_sequences_dfs = []
for seq_id, seq_df in seq_gp:
    seq_df_copy = seq_df.copy()
    
    for i in range(1, 6):
        pixel_cols = [f"tof_{i}_v{p}" for p in range(64)]
        tof_data = seq_df_copy[pixel_cols].replace(-1, np.nan)
        seq_df_copy[f'tof_{i}_mean'] = tof_data.mean(axis=1)
        seq_df_copy[f'tof_{i}_std'] = tof_data.std(axis=1)
        seq_df_copy[f'tof_{i}_min'] = tof_data.min(axis=1)
        seq_df_copy[f'tof_{i}_max'] = tof_data.max(axis=1)

    seq_df_copy[final_feature_cols] = seq_df_copy[final_feature_cols].ffill().bfill().fillna(0)
    processed_sequences_dfs.append(seq_df_copy[final_feature_cols])        

final_df = pd.concat(processed_sequences_dfs, ignore_index=True)    
final_df.to_parquet(EXPORT_DIR/'tof_basic_kaggle_feats.parquet')

: 