<a href="https://colab.research.google.com/github/vicmcl/ml-laptime/blob/main/ml_laptime/notebook/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from pathlib import Path
import os
import glob
import seaborn as sns

import pandas as pd
import pickle

In [3]:
# Mount Drive
from google.colab import drive

drive.flush_and_unmount()
drive.mount('/content/drive')
DRIVE_PATH = Path("/content/drive/MyDrive/")
ML_PATH = DRIVE_PATH / "Projects" / "laptime-simulation"

Mounted at /content/drive


In [4]:
data_path = glob.glob(str(ML_PATH / "races*.pkl"))

In [5]:
pip install -q fastf1

In [6]:
data_path

['/content/drive/MyDrive/Projects/laptime-simulation/races_2021.pkl',
 '/content/drive/MyDrive/Projects/laptime-simulation/races_2020.pkl',
 '/content/drive/MyDrive/Projects/laptime-simulation/races_2019.pkl']

In [7]:
data = {}
with open(data_path[0], "rb") as f:
    data[2021] = pickle.load(f)

with open(data_path[1], "rb") as f:
    data[2020] = pickle.load(f)

with open(data_path[2], "rb") as f:
    data[2019] = pickle.load(f)

In [57]:
columns_laps = [
    'Time', 'LapNumber', 'Stint', 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST',
    'Compound', 'TyreLife', 'LapTime'
]

columns_telemetry = ['SessionTime', 'Throttle', 'Brake', 'DRS']

columns_weather = ['Time', 'TrackTemp', 'WindSpeed']

compound_map = {
    'SOFT': 1.0,
    'MEDIUM': 2.0,
    'HARD': 3.0
}

In [58]:
def process_lap_data(gp, year):
    laps_data = data[year][gp]['laps'][columns_laps].copy()
    laps_data['Season'] = year
    laps_data = laps_data.reset_index().drop('index', axis=1)
    laps_data['Compound'] = laps_data['Compound'].map(lambda x: compound_map.get(x, 0.0))
    return laps_data

In [89]:
def map_drs(drs):
    if drs >= 10:
        return True
    else:
        return False

def process_telemetry_data(gp, year, laps):
    telemetry_data = data[year][gp]['telemetry'][columns_telemetry].copy()
    telemetry_with_laps = pd.merge_asof(
        telemetry_data,
        laps[['Time', 'LapNumber']],
        left_on='SessionTime',
        right_on='Time',
        direction='forward'
    ).drop('Time', axis=1)
    telemetry_with_laps.loc[:, 'DRS'] = telemetry_with_laps['DRS'].map(map_drs)
    telemetry_avg = telemetry_with_laps.groupby('LapNumber').mean()
    return telemetry_avg

In [72]:
def process_weather_data(gp, year):
    weather_data = data[year][gp]['weather'][columns_weather].copy()
    return weather_data

In [85]:
def merge_data(laps, weather, telemetry):
    merged_data = pd.merge_asof(laps, weather, on='Time', direction='nearest').set_index('LapNumber').drop('Time', axis=1)
    merged_data = pd.concat([merged_data, telemetry], axis=1).reset_index()
    merged_data = merged_data.drop('SessionTime', axis=1)
    merged_data.loc[:, 'LapTime'] = merged_data['LapTime'].map(lambda x: x.total_seconds())
    return merged_data

In [90]:
def preprocess_data(gp, year):
    laps = process_lap_data(gp, year)
    weather = process_weather_data(gp, year)
    telemetry = process_telemetry_data(gp, year, laps)
    merged_data = merge_data(laps, weather, telemetry)
    return merged_data

In [117]:
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

si = SimpleImputer()

In [118]:
gp = 'Abu Dhabi Grand Prix'
df = pd.DataFrame()

for year in data.keys():
    prepro_data = preprocess_data(gp, year)
    for col in prepro_data.columns:
        prepro_data[col] = si.fit_transform(prepro_data[[col]])
    df = pd.concat([df, prepro_data])

df = df.reset_index(drop=True)

In [119]:
df.info()

<class 'fastf1.core.Laps'>
RangeIndex: 181 entries, 0 to 180
Data columns (total 15 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   LapNumber  181 non-null    float64
 1   Stint      181 non-null    float64
 2   SpeedI1    181 non-null    float64
 3   SpeedI2    181 non-null    float64
 4   SpeedFL    181 non-null    float64
 5   SpeedST    181 non-null    float64
 6   Compound   181 non-null    float64
 7   TyreLife   181 non-null    float64
 8   LapTime    181 non-null    float64
 9   Season     181 non-null    float64
 10  TrackTemp  181 non-null    float64
 11  WindSpeed  181 non-null    float64
 12  Throttle   181 non-null    float64
 13  Brake      181 non-null    float64
 14  DRS        181 non-null    float64
dtypes: float64(15)
memory usage: 21.3 KB


In [120]:
prepro_data.head()

Unnamed: 0,LapNumber,Stint,SpeedI1,SpeedI2,SpeedFL,SpeedST,Compound,TyreLife,LapTime,Season,TrackTemp,WindSpeed,Throttle,Brake,DRS
0,1.0,1.0,278.0,301.0,216.0,298.0,2.0,4.0,102.867981,2019.0,30.5,1.5,59.717146,0.212766,0.0
1,2.0,1.0,276.0,284.0,214.0,295.0,2.0,5.0,103.281,2019.0,30.6,1.5,61.994819,0.189119,0.0
2,3.0,1.0,272.0,285.0,214.0,293.0,2.0,6.0,103.245,2019.0,30.3,1.5,62.760618,0.177606,0.0
3,4.0,1.0,271.0,283.0,215.0,293.0,2.0,7.0,103.308,2019.0,30.3,0.9,62.631714,0.182864,0.0
4,5.0,1.0,270.0,282.0,215.0,291.0,2.0,8.0,103.506,2019.0,30.3,1.1,61.287918,0.178663,0.0


In [121]:
def filter_speed_trap(data):
    corr = data[['LapTime', 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST']].corr()
    speed_trap = corr['LapTime'].iloc[1:].abs().idxmax()
    data['SpeedTrap'] = data[speed_trap]
    data = data.drop(['SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST'], axis=1)
    return data

In [None]:
with open(ML_PATH / "tabular_data.pkl", "wb") as f:
    pickle.dump(merged_data, f)