# FastF1 Data Processing

In [2]:
%pip install fastf1 pandas scikit-learn
import fastf1
import pandas as pd


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


# Input Format

- Each driver's laptime is an input vector. Reasoning
  - Most driver laps are independent
  - Better handles missing data
- Filtering
  - TODO: consider filtering out pit stop laps (in laps and out laps)
- Features
  - Driver identification - one hot encoding?
  - Lap number - normalize to be out of the total laps
  - Laps since last tyre change
  - Tyre compound - one hot encoding?
  - Weather - one hot encoding
  - Qualifying position
  - Driver standing
  - Constructor standing
  - Better than teammate boolean
  - Weather conditions

In [4]:
# event_schedule -> event -> session -> laps -> telemetry
from sklearn.preprocessing import OneHotEncoder

def process_event(event):
    fastest_qualifying_lap_times = {}

    qualifying = event.get_qualifying()
    qualifying.load()

    for driver in pd.unique(qualifying.results["Abbreviation"]):
        fastest_qualifying_lap_times[driver] = qualifying.laps.pick_driver(driver).pick_fastest().LapTime

    race = event.get_race()
    race.load()

    race_laps = race.laps

    race_laps = race_laps[[
        "LapTime",
        "Driver", # TODO 1-hot encode/ is this needed?
        "LapNumber", # TODO does this need to be normalized
        "Stint",
        "Compound",
        "TyreLife",
        "Team", # TODO 1-hot encode/ is this needed?
        "TrackStatus",
        "Position"
        # Unsure of whether these will be useful
        # TODO circuit
        # TODO weather
        # TODO driver standing
        # TODO constructor standing
        # TODO better teammate
    ]]

    # Encode (Tyre) Compound
    compound_enc = OneHotEncoder(categories=[["SOFT", "MEDIUM", "HARD", "INTER", "WET"]], sparse_output=False)
    compound_df = pd.DataFrame(compound_enc.fit_transform(race_laps[["Compound"]]), columns=compound_enc.get_feature_names_out(['Compound']))
    race_laps = pd.concat([race_laps.drop(columns=["Compound"]), compound_df], axis=1)

    # Encode TrackStatus
    def track_status_apply(row):
        track_status = str(row["TrackStatus"])

        clear = "1" in track_status
        yellow = "2" in track_status
        safety_car = "4" in track_status
        red_flag = "5" in track_status
        virtual_safety_car_deployed = "6" in track_status
        virtual_safety_car_ending = "7" in track_status

        encoding = [clear, yellow, safety_car, red_flag, virtual_safety_car_deployed, virtual_safety_car_ending]
        return [float(i) for i in encoding]

    
    race_laps[[
        "TrackStatus_Clear",
        "TrackStatus_YellowFlag",
        "TrackStatus_SafetyCar",
        "TrackStatus_RedFlag",
        "TrackStatus_VirtualSafetyCarDeployed",
        "TrackStatus_VirtualSafetyCarEnding"
    ]] = race_laps.apply(track_status_apply, axis=1, result_type="expand")
    race_laps = race_laps.drop(columns=["TrackStatus"])

    # Qualifying results
    race_laps["FastestQualifyingLapTime"] = race_laps.apply(lambda row: fastest_qualifying_lap_times[row["Driver"]], axis=1)

    return race_laps


event = fastf1.get_event(2022, 'Bahrain')
data = process_event(event)
data.to_csv("bahrain-2022.csv")


core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.3.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '1', '55', '11', '44', '77', '20', '14', '63', '10', '31', '47', '4', '23', '24', '22', '27', '3', '18', '6']
core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.3.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count

In [None]:
# for year in range(2022, 2023):
#     event_schedule = fastf1.get_event_schedule(year, include_testing=False)

#     event_round = 0
#     events = []
#     while True:
#         event_round += 1

#         try:
#             event = event_schedule.get_event_by_round(event_round)
#             events.append(event)
#         except:
#             break

#     total_events = len(events)