<a href="https://colab.research.google.com/github/vicmcl/ml-laptime/blob/main/ml_laptime/notebook/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from pathlib import Path
import os
import glob
import seaborn as sns

import pandas as pd
import pickle

In [3]:
# Mount Drive
from google.colab import drive

drive.flush_and_unmount()
drive.mount('/content/drive')
DRIVE_PATH = Path("/content/drive/MyDrive/")
ML_PATH = DRIVE_PATH / "Projects" / "laptime-simulation"

Mounted at /content/drive


In [4]:
data_path = glob.glob(str(ML_PATH / "races*.pkl"))

In [5]:
pip install -q fastf1

In [6]:
data_path

['/content/drive/MyDrive/Projects/laptime-simulation/races_2021.pkl',
 '/content/drive/MyDrive/Projects/laptime-simulation/races_2020.pkl',
 '/content/drive/MyDrive/Projects/laptime-simulation/races_2019.pkl']

In [7]:
data = {}
with open(data_path[0], "rb") as f:
    data[2021] = pickle.load(f)

In [8]:
with open(data_path[1], "rb") as f:
    data[2020] = pickle.load(f)

In [9]:
with open(data_path[2], "rb") as f:
    data[2019] = pickle.load(f)

In [10]:
session_2019 = data[2019]['Abu Dhabi Grand Prix']
session_2020 = data[2020]['Abu Dhabi Grand Prix']
session_2021 = data[2021]['Abu Dhabi Grand Prix']

In [57]:
columns_laps = [
    'Time', 'LapNumber', 'Stint', 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST',
    'Compound', 'TyreLife', 'LapTime'
]

columns_telemetry = ['SessionTime', 'Throttle', 'Brake', 'DRS']

columns_weather = ['Time', 'TrackTemp', 'WindSpeed']

compound_map = {
    'SOFT': 1.0,
    'MEDIUM': 2.0,
    'HARD': 3.0
}

In [58]:
def process_lap_data(gp, year):
    laps_data = data[year][gp]['laps'][columns_laps].copy()
    laps_data['Season'] = year
    laps_data = laps_data.reset_index().drop('index', axis=1)
    laps_data['Compound'] = laps_data['Compound'].map(lambda x: compound_map.get(x, 0.0))
    return laps_data

In [89]:
def map_drs(drs):
    if drs >= 10:
        return True
    else:
        return False

def process_telemetry_data(gp, year, laps):
    telemetry_data = data[year][gp]['telemetry'][columns_telemetry].copy()
    telemetry_with_laps = pd.merge_asof(
        telemetry_data,
        laps[['Time', 'LapNumber']],
        left_on='SessionTime',
        right_on='Time',
        direction='forward'
    ).drop('Time', axis=1)
    telemetry_with_laps.loc[:, 'DRS'] = telemetry_with_laps['DRS'].map(map_drs)
    telemetry_avg = telemetry_with_laps.groupby('LapNumber').mean()
    return telemetry_avg

In [72]:
def process_weather_data(gp, year):
    weather_data = data[year][gp]['weather'][columns_weather].copy()
    return weather_data

In [85]:
def merge_data(laps, weather, telemetry):
    merged_data = pd.merge_asof(laps, weather, on='Time', direction='nearest').set_index('LapNumber').drop('Time', axis=1)
    merged_data = pd.concat([merged_data, telemetry], axis=1).reset_index()
    merged_data = merged_data.drop('SessionTime', axis=1)
    merged_data.loc[:, 'LapTime'] = merged_data['LapTime'].map(lambda x: x.total_seconds())
    return merged_data

In [90]:
def preprocess_data(gp, year):
    laps = process_lap_data(gp, year)
    weather = process_weather_data(gp, year)
    telemetry = process_telemetry_data(gp, year, laps)
    merged_data = merge_data(laps, weather, telemetry)
    return merged_data

In [87]:
prepro = preprocess_data('Abu Dhabi Grand Prix', 2021)

In [88]:
prepro.head()

Unnamed: 0,LapNumber,Stint,SpeedI1,SpeedI2,SpeedFL,SpeedST,Compound,TyreLife,LapTime,Season,TrackTemp,WindSpeed,Throttle,Brake,DRS
0,1.0,1.0,296.0,249.0,295.0,214.0,2.0,1.0,78.399,2021,52.1,0.5,65.327007,0.207299,0.0
1,2.0,1.0,297.0,246.0,296.0,280.0,2.0,2.0,74.68,2021,51.8,0.4,64.527692,0.190769,0.0
2,3.0,1.0,301.0,250.0,298.0,285.0,2.0,3.0,74.471,2021,51.7,0.8,64.089231,0.2,0.0
3,4.0,1.0,,241.0,298.0,284.0,2.0,4.0,74.713,2021,51.9,0.5,62.656394,0.192604,0.0
4,5.0,1.0,303.0,244.0,299.0,287.0,2.0,5.0,74.66,2021,51.9,0.5,63.229584,0.195686,0.0


In [106]:
def select_speed_trap(data):
    corr = data[['LapTime', 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST']].corr()
    speed_trap = corr['LapTime'].iloc[1:].abs().idxmax()
    return speed_trap

'SpeedI1'

In [None]:
with open(ML_PATH / "tabular_data.pkl", "wb") as f:
    pickle.dump(merged_data, f)