In [39]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor


Load dataset

In [40]:
laps = pd.read_csv('data/2023_monza_race_laps.csv')

Encode categorical features

In [41]:
laps_encoded = pd.get_dummies(laps, columns=['Driver', 'Team', 'Compound', 'TrackStatus'])

Separate x and y

In [42]:
x = laps_encoded.drop('LapTime', axis=1)
y = laps_encoded['LapTime']

Define models

In [43]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
models = {
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'XGBoost': XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
}

Train and evaluate models

In [44]:
for name, model in models.items():
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    print(f"\n {name} Results:")
    print(f"MAE: {mae:.3f} sec")
    print(f"RMSE: {rmse:.3f} sec")
    print(f"R² Score: {r2:.3f}")


 Random Forest Results:
MAE: 0.258 sec
RMSE: 0.396 sec
R² Score: 0.741

 XGBoost Results:
MAE: 0.259 sec
RMSE: 0.382 sec
R² Score: 0.759


Save XGBoost model

In [45]:
joblib.dump(models['XGBoost'], 'data/lap_time_model_xgb.joblib')

['data/lap_time_model_xgb.joblib']