# 03 - Build Foundation Sets

## Overview
Join laps with weather and events to create processed base tables.

In [6]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

import pandas as pd
from f1ts import config, io_flat, foundation, validation

## Load

In [7]:
interim_dir = config.paths()['data_interim']
raw_dir = config.paths()['data_raw']

laps_interim = io_flat.read_parquet(interim_dir / 'laps_interim.parquet')
sessions = io_flat.read_csv(raw_dir / 'sessions.csv')

# Load all weather files
weather_files = list(raw_dir.glob('*_weather.csv'))
weather_data = []
for wf in weather_files:
    weather_data.append(pd.read_csv(wf))
weather_raw = pd.concat(weather_data, ignore_index=True) if weather_data else pd.DataFrame()

✓ Loaded laps_interim.parquet: 2,533 rows, 13 cols
  Dtypes: {'session_key': dtype('O'), 'driver': dtype('O'), 'lap': dtype('int64'), 'lap_time_ms': dtype('int64'), 'sector1_ms': dtype('int64'), 'sector2_ms': dtype('int64'), 'sector3_ms': dtype('int64'), 'compound': dtype('O'), 'tyre_life': dtype('int64'), 'is_pit_lap': dtype('bool'), 'track_status': dtype('O'), 'stint_id': dtype('int64'), 'tyre_age_laps': dtype('int64')}
✓ Loaded sessions.csv: 3 rows, 6 cols


## Transform

In [8]:
laps_processed, stints, events = foundation.foundation_pipeline(
    laps_interim, weather_raw, sessions
)

Starting foundation pipeline...
✓ Extracted 2 events
✓ Built processed laps: 2,533 rows
✓ Built stint aggregations: 120 stints


  laps[col] = laps.groupby('session_key')[col].fillna(method='ffill')
  laps[col] = laps.groupby('session_key')[col].fillna(method='bfill')
  laps[col] = laps.groupby('session_key')[col].fillna(method='ffill')
  laps[col] = laps.groupby('session_key')[col].fillna(method='bfill')
  laps[col] = laps.groupby('session_key')[col].fillna(method='ffill')
  laps[col] = laps.groupby('session_key')[col].fillna(method='bfill')
  laps[col] = laps.groupby('session_key')[col].fillna(method='ffill')
  laps[col] = laps.groupby('session_key')[col].fillna(method='bfill')
  laps[col] = laps.groupby('session_key')[col].fillna(method='ffill')
  laps[col] = laps.groupby('session_key')[col].fillna(method='bfill')


## Validate

In [9]:
validation.validate_uniqueness(laps_processed, ['session_key', 'driver', 'lap'], 'laps_processed')
print('✓ Validations passed')

✓ Uniqueness validation passed for laps_processed on ['session_key', 'driver', 'lap']
✓ Validations passed


## Save

In [10]:
processed_dir = config.paths()['data_processed']
io_flat.write_parquet(laps_processed, processed_dir / 'laps_processed.parquet')
io_flat.write_parquet(stints, processed_dir / 'stints.parquet')
io_flat.write_parquet(events, processed_dir / 'events.parquet')
print('✓ Saved processed data')

✓ Saved laps_processed.parquet: 2,533 rows, 22 cols
✓ Saved stints.parquet: 120 rows, 14 cols
✓ Saved events.parquet: 2 rows, 4 cols
✓ Saved processed data


## Repro Notes

- Joined laps with weather and events
- Built stint aggregations
- Extracted racing events from track status