In [16]:
import fastf1 as ff1
import pandas as pd
from fastf1 import plotting
from matplotlib import pyplot as plt
import tensorflow as tf

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
import joblib

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [24]:
def check_gpu():
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
    print("Built with CUDA: ", tf.test.is_built_with_cuda())
    print("GPU Available: ", tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))

def get_race_data_for_year(year=2023):
    race_number = 1
    all_data = []
    while True:
        try:
            session = ff1.get_session(year, race_number, 'R')
            session.load()
            for index, lap in session.laps.iterrows():
                weather = lap.get_weather_data()
                car = lap.get_car_data()
                driver_data = {
                    'race_name': session.event['EventName'],
                    'race_date': session.date,
                    'race_number': race_number,
                    'driver': lap['Driver'],
                    'lap_number': lap['LapNumber'],
                    'sector1': lap['Sector1Time'],
                    'sector2': lap['Sector2Time'],
                    'sector3': lap['Sector3Time'],
                    'tire_life': lap['TyreLife'],
                    'fresh_tire': lap['FreshTyre'],
                    'compound' : lap['Compound'],
                    'accurate': lap['IsAccurate'],
                    'stint': lap['Stint'],
                    'air_temp' : weather['AirTemp'],
                    'humidity' : weather['Humidity'],
                    'pressure' : weather['Pressure'],
                    'rainfall' : weather['Rainfall'],
                    'track_temp': weather['TrackTemp'],
                    'wind_direction' : weather['WindDirection'],
                    'wind_speed' : weather['WindSpeed'],
                    'drs' : car['DRS']
                }
                all_data.append(driver_data)
            
            print(f"Fetched data for {session.event['EventName']}")
            race_number += 1
        except Exception as e:
            print(f"Stopped at race number {race_number - 1}. Error: {e}")
            break
    return all_data
    
def process_dataframe(data):
    df = pd.DataFrame(data)
    
    #Encode Drivers
    df['driver_id'] = df['driver'].astype('category').cat.codes
    
    #Round from date-time to 3 decimals
    df['sector1'] = (df['sector1'].dt.total_seconds()).round(3)
    df['sector2'] = (df['sector2'].dt.total_seconds()).round(3)
    df['sector3'] = (df['sector3'].dt.total_seconds()).round(3)
    df = pd.get_dummies(df, columns=['compound'], prefix='compound')
    
    #
    bool_cols = ['fresh_tire', 'rainfall', 'accurate', 'compound_HARD', 'compound_INTERMEDIATE', 'compound_MEDIUM', 'compound_SOFT', 'compound_WET']
    for col in bool_cols:
        df[col] = df[col].astype(int)
    
    df.dropna(inplace=True)
    return df

def train_and_evaluate(data, model, features, target):
    # Sort the dataset by date
    data = data.sort_values(by='race_date')

    # Determine the split point
    num_races = data['race_number'].nunique()
    train_races = int(0.6 * num_races)

    # Get the race number where the split should occur
    split_race_number = data['race_number'].unique()[train_races]

    # Create train and test sets based on the split_race_number
    train_df = data[data['race_number'] <= split_race_number]
    test_df = data[data['race_number'] > split_race_number]

    X_train = train_df[features]
    y_train = train_df[target]
    X_test = test_df[features]
    y_test = test_df[target]

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    predictions = model.predict(X_test)

    # Calculate MSE
    mse = mean_squared_error(y_test, predictions)
    
    return mse

def train_test_split_by_race(df,ratio):
    df_encoded = df.sort_values(by='race_date')
    num_races = df_encoded['race_number'].nunique()
    train_races = int(ratio * num_races)
    split_race_number = df_encoded['race_number'].unique()[train_races]

    train_df = df_encoded[df_encoded['race_number'] <= split_race_number]
    test_df = df_encoded[df_encoded['race_number'] > split_race_number]
    return train_df, test_df

# Data Acquisitions
Accessing the 2023 season to get all race session data. Features include 
* Race Name
* Date
* Race Number (Events are number chronologically)
* Driver (3 letter name)
* Lap Number
* Sector Times
* Tire - Life, if Fresh, Compound
* Weather - Air + Track Temp, Humidity, Pressure, Rainfall, Wind Speed/Direction

In [25]:
def main():
    check_gpu()
    ff1.Cache.enable_cache("D:\\f1predict\\data")
    all_data_2023 = get_race_data_for_year()
    df = process_dataframe(all_data_2023)


if __name__ == '__main__':
    main()

Num GPUs Available:  0
Built with CUDA:  False
GPU Available:  False


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '4', '31', '16', '81']
core           INFO 	Loading data for Saudi Arabian Grand Pri

Fetched data for Bahrain Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['11', '1', '14', '63', '44', '55', '16', '31', '10', '20', '22', '27', '24', '21', '81', '2', '4', '77', '23', '18']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Saudi Arabian Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '44', '14', '18', '11', '4', '27', '81', '24', '22', '77', '55', '10', '31', '21', '2', '20', '63', '23', '16']
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Australian Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['11', '1', '16', '14', '55', '44', '18', '63', '4', '22', '81', '23', '20', '10', '31', '2', '27', '77', '24', '21']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Azerbaijan Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '63', '55', '44', '16', '10', '31', '20', '22', '18', '77', '23', '27', '24', '4', '21', '81', '2']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Miami Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '31', '44', '63', '16', '10', '55', '4', '81', '77', '21', '24', '23', '22', '11', '27', '2', '20', '18']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Monaco Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '44', '63', '11', '55', '18', '14', '31', '24', '10', '16', '22', '81', '21', '27', '23', '4', '20', '77', '2']
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Spanish Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '44', '16', '55', '11', '23', '31', '18', '77', '81', '10', '4', '22', '27', '24', '20', '21', '63', '2']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Canadian Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '11', '4', '14', '55', '63', '44', '18', '10', '23', '24', '2', '31', '77', '81', '21', '20', '22', '27']
core           INFO 	Loading data for British Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Austrian Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '81', '63', '11', '14', '23', '16', '55', '2', '77', '27', '18', '24', '22', '21', '10', '20', '31']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for British Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '44', '81', '63', '16', '55', '14', '18', '23', '77', '3', '27', '22', '24', '20', '2', '31', '10']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Hungarian Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '44', '14', '63', '4', '31', '18', '22', '10', '77', '24', '23', '20', '3', '2', '27', '55', '81']
core           INFO 	Loading data for Dutch Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Belgian Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '10', '11', '55', '44', '4', '23', '81', '31', '18', '27', '40', '77', '22', '20', '63', '24', '16', '2']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Dutch Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '44', '23', '4', '14', '77', '40', '81', '2', '24', '10', '18', '27', '20', '31', '22']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Italian Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '44', '16', '1', '10', '81', '11', '40', '20', '23', '24', '27', '2', '14', '63', '77', '31', '22', '18']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Singapore Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '44', '55', '63', '14', '31', '10', '40', '22', '24', '27', '20', '23', '2', '18', '11', '77']
core           INFO 	Loading data for Qatar Grand Prix - Race [v3.1.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Fetched data for Japanese Grand Prix


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '81', '4', '63', '16', '14', '31', '77', '24', '11', '18', '10', '23', '20', '22', '27', '40', '2', '44', '55']
core           INFO 	Loading data for United States Grand Prix - Race [v3.1.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Fetched data for Qatar Grand Prix


req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
req            INFO 	No cached data found for car_data. Loading data...
_api           INFO 	Fetching car data...
req            INFO 	No cached data found for position_data. Loading data...
_api           INFO 	Fetching position data...
req            INFO 	No cached data found for weather_data. Loading data...
_api           INFO 	Fetching weather data...
r

Stopped at race number 17. Error: The data you are trying to access has not been loaded yet. See `Session.load`


                race_name           race_date  race_number driver  lap_number  \
1      Bahrain Grand Prix 2023-03-05 15:00:00            1    VER         2.0   
2      Bahrain Grand Prix 2023-03-05 15:00:00            1    VER         3.0   
3      Bahrain Grand Prix 2023-03-05 15:00:00            1    VER         4.0   
4      Bahrain Grand Prix 2023-03-05 15:00:00            1    VER         5.0   
5      Bahrain Grand Prix 2023-03-05 15:00:00            1    VER         6.0   
...                   ...                 ...          ...    ...         ...   
18917    Qatar Grand Prix 2023-10-08 17:00:00           17    SAR        36.0   
18918    Qatar Grand Prix 2023-10-08 17:00:00           17    SAR        37.0   
18919    Qatar Grand Prix 2023-10-08 17:00:00           17    SAR        38.0   
18920    Qatar Grand Prix 2023-10-08 17:00:00           17    SAR        39.0   
18921    Qatar Grand Prix 2023-10-08 17:00:00           17    SAR        40.0   

       sector1  sector2  se

## Features
   
*Compound (str): Tyres event specific compound name: SOFT, MEDIUM, HARD, INTERMEDIATE, WET (One-Hot Encoded)

*TyreLife (float): Laps driven on this tire (includes laps in other sessions for used sets of tires)

*FreshTyre (bool): Tyre had TyreLife=0 at stint start, i.e. was a new tire


*IsAccurate (bool): Indicates that the lap start and end time are synced correctly with other laps. Do not confuse this with the accuracy of the lap time or sector times. They are always considered to be accurate if they exist! If this value is True, the lap has passed as basic accuracy check for timing data. This does not guarantee accuracy but laps marked as inaccurate need to be handled with caution. They might contain errors which can not be spotted easily. Laps need to satisfy the following criteria to be marked as accurate:
*Stint (float): Stint number

Coverting all columns that used True/False into 1/0, making it possible for the model to understand. 

# Splitting the Data

In [47]:
features = ['driver_id','lap_number', 'sector1', 'sector2', 'sector3', 'tire_life', 'fresh_tire', 'accurate', 'stint', 
            'compound_HARD', 'compound_INTERMEDIATE', 'compound_MEDIUM', 'compound_SOFT', 'compound_WET','air_temp','humidity','pressure','track_temp']
X_train = train_df[features]
y_train = train_df['sector1']  # or whichever is your target variable
X_test = test_df[features]
y_test = test_df['sector1']

# Race Simulation

In [None]:
class RaceSimulator:
    def __init__(self, model):
        self.drivers_state = {driver: conditions.copy() for driver, conditions in initial_conditions.items()}
        self.model = model

    def predict_next_lap(self, driver, current_conditions):
        # Use the model to predict the next sector's time based on current_conditions
        input_features = current_conditions
        input_features['driver_id'] = driver
        predicted_time = self.model.predict([input_features])
        return predicted_time

    def simulate_race_for_driver(self, driver, driver_conditions):
        race_time = 0
        for condition in driver_conditions:
            race_time += self.predict_next_lap(driver, condition)
        return race_time

    def simulate_race(self, test_df):
        results = {}
        for driver in test_df['driver_id'].unique():
            driver_data = test_df[test_df['driver_id'] == driver]
            driver_conditions = driver_data.to_dict(orient='records')
            results[driver] = self.simulate_race_for_driver(driver, driver_conditions)
        return results

In [None]:
# After train_and_evaluate
model = RandomForestRegressor()
mse = train_and_evaluate(data, model, features, target)

simulator = RaceSimulator(model)  # initial_conditions is not used now, set to None
race_results = simulator.simulate_race(test_df)
print(race_results)