# 04 - Feature Engineering

## Overview
Build stint_features.parquet with all required columns for modeling.

In [6]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

import pandas as pd
from f1ts import config, io_flat, features, validation

## Load

In [7]:
processed_dir = config.paths()['data_processed']
raw_dir = config.paths()['data_raw']

laps_processed = io_flat.read_parquet(processed_dir / 'laps_processed.parquet')
sessions = io_flat.read_csv(raw_dir / 'sessions.csv')

pitloss_csv = str(config.paths()['data_lookups'] / 'pitloss_by_circuit.csv')
hazard_csv = str(config.paths()['data_lookups'] / 'hazard_priors.csv')

✓ Loaded laps_processed.parquet: 2,533 rows, 22 cols
  Dtypes: {'session_key': dtype('O'), 'driver': dtype('O'), 'lap': dtype('int64'), 'lap_time_ms': dtype('int64'), 'sector1_ms': dtype('int64'), 'sector2_ms': dtype('int64'), 'sector3_ms': dtype('int64'), 'compound': dtype('O'), 'tyre_life': dtype('int64'), 'is_pit_lap': dtype('bool'), 'track_status': dtype('O'), 'stint_id': dtype('int64'), 'tyre_age_laps': dtype('int64'), 'air_temp': dtype('float64'), 'track_temp': dtype('float64'), 'humidity': dtype('float64'), 'rainfall': dtype('bool'), 'wind_speed': dtype('float64'), 'event_type': dtype('O'), 'duration_laps': dtype('float64'), 'lap_number': dtype('int64'), 'circuit_name': dtype('O')}
✓ Loaded sessions.csv: 3 rows, 6 cols


## Transform: Assemble Features

In [8]:
stint_features = features.assemble_feature_table(
    laps_processed, sessions, pitloss_csv, hazard_csv
)

Assembling feature table...
✓ Added rolling pace features
✓ Estimated degradation slopes
✓ Added sector deltas
✓ Joined pit loss lookup
✓ Added hazard baselines
✓ Created degradation target
✓ Feature table complete: 2,533 rows, 32 columns


  return np.nanmean(a, axis, out=out, keepdims=keepdims)


## Validate

In [9]:
validation.validate_stint_features(stint_features)
print(f'\n✓ Feature table has {len(stint_features.columns)} columns')

✓ Schema validation passed for stint_features
✓ Uniqueness validation passed for stint_features on ['session_key', 'driver', 'lap']
✓ No NA values in required columns for stint_features
✓ Categorical validation passed for stint_features.compound
✓ All validations passed for stint_features (2,533 rows)

✓ Feature table has 32 columns


## Save

In [10]:
features_dir = config.paths()['data_features']
io_flat.write_parquet(stint_features, features_dir / 'stint_features.parquet')

# Also save degradation training subset
deg_train = stint_features[stint_features['target_deg_ms'].notna()].copy()
io_flat.write_parquet(deg_train, features_dir / 'degradation_train.parquet')

print('✓ Saved feature tables')

✓ Saved stint_features.parquet: 2,533 rows, 32 cols
✓ Saved degradation_train.parquet: 2,533 rows, 32 cols
✓ Saved feature tables


## Repro Notes

- Added rolling pace features
- Estimated degradation slopes
- Joined pit loss and hazard lookups
- Created degradation target variable