In [None]:
import os
import fastf1
import pandas as pd
from tqdm import tqdm

# Create cache directory
os.makedirs("cache", exist_ok=True)
fastf1.Cache.enable_cache("cache")


In [None]:
CIRCUIT_TYPES = {
    'Monaco': 'street',
    'Baku': 'street',
    'Singapore': 'street',
    'Miami': 'street',
    'Las Vegas': 'street',
    'Melbourne': 'street',
    'Montreal': 'street',
    # Others default to permanent
}

rounds_by_year = {
    2022: list(range(1, 23)),
    2023: list(range(1, 23)),
    2024: list(range(1, 20)),
    2025: list(range(1, 11)) 
}


In [None]:
all_data = []

for year in tqdm(rounds_by_year, desc="Fetching qualifying sessions"):
    for round_num in rounds_by_year[year]:
        try:
            session = fastf1.get_session(year, round_num, 'Q')
            session.load()

            event_info = session.event
            circuit_name = event_info['EventName']
            circuit_type = CIRCUIT_TYPES.get(circuit_name, 'permanent')
            weather = session.weather_data.iloc[0] if not session.weather_data.empty else {}

            laps = session.laps
            best_laps = laps.groupby('Driver').apply(lambda x: x.pick_fastest()).reset_index(drop=True)

            for _, lap in best_laps.iterrows():
                all_data.append({
                    'Year': year,
                    'Round': round_num,
                    'GrandPrix': event_info['EventName'],
                    'CircuitName': circuit_name,
                    'CircuitType': circuit_type,
                    'Location': event_info['Location'],
                    'Country': event_info['Country'],
                    'Driver': lap['Driver'],
                    'Team': lap['Team'],
                    'LapTime_seconds': lap['LapTime'].total_seconds() if pd.notnull(lap['LapTime']) else None,
                    'Sector1Time': lap['Sector1Time'].total_seconds() if pd.notnull(lap['Sector1Time']) else None,
                    'Sector2Time': lap['Sector2Time'].total_seconds() if pd.notnull(lap['Sector2Time']) else None,
                    'Sector3Time': lap['Sector3Time'].total_seconds() if pd.notnull(lap['Sector3Time']) else None,
                    'Tyre': lap['Compound'],
                    'TyreLife': lap['TyreLife'],
                    'FreshTyre': lap['FreshTyre'],
                    'TrackStatus': lap['TrackStatus'],
                    'IsAccurate': lap['IsAccurate'],
                    'AirTemp': weather.get('AirTemp'),
                    'Humidity': weather.get('Humidity'),
                    'Pressure': weather.get('Pressure'),
                    'Rainfall': weather.get('Rainfall'),
                    'WindSpeed': weather.get('WindSpeed'),
                    'WindDirection': weather.get('WindDirection'),
                })

        except Exception as e:
            print(f"Error in {year} Round {round_num}: {e}")


In [None]:
df = pd.DataFrame(all_data)
df.to_csv("qualifying_data_raw.csv", index=False)
df.head()


In [None]:
#Correcting street circuit labels

df = pd.read_csv(r"E:\GTech\Courses\ML\ML_Project\My_files\Midterm_checkpoint\datasets\FastF1\Dataset_with_sprint_races\f1_qualifying_laps_with_circuit_data_2022_to_2025.csv")

# Map locations or grand prix names to 'street'
street_circuits = [
    'Monaco', 'Baku', 'Singapore', 'Miami',
    'Las Vegas', 'Melbourne', 'Montreal'
]

df.loc[df['Location'].isin(street_circuits), 'CircuitType'] = 'street'

df.to_csv("qualifying_data_final_without_p_including_sprint.csv", index=False)