In [33]:
import pandas as pd

In [34]:
races = pd.read_csv("data/races.csv")
results = pd.read_csv("data/results.csv")
driver_standings = pd.read_csv("data/driver_standings.csv")
constructor_standings = pd.read_csv("data/constructor_standings.csv")

# races = races[['raceId', 'year', 'round', 'circuitId']]
# results = results[['raceId', 'driverId', 'constructorId', 'grid', 'position']]
# driver_standings = driver_standings[['raceId', 'driverId', 'points', 'wins']]
# constructor_standings = constructor_standings[['raceId', 'constructorId', 'points', 'wins']]


In [35]:
results = results.merge(races, on="raceId", how="inner")
# Merge driver_standings on both raceId and driverId:
results = results.merge(
    driver_standings,
    on=["raceId", "driverId"],
    how="inner",
    suffixes=('', '_driver')
)
# Merge constructor_standings on both raceId and constructorId:
results = results.merge(
    constructor_standings,
    on=["raceId", "constructorId"],
    how="inner",
    suffixes=('', '_constructor')
)

# Create dataframes with previous race information
# Sort by year and round to ensure proper ordering
driver_previous = driver_standings.merge(races[['raceId', 'year', 'round']], on='raceId')
driver_previous = driver_previous.sort_values(['year', 'round', 'driverId'])
# Create a new column that will be used for joining - using the next race
driver_previous['next_round'] = driver_previous['round'] + 1
driver_previous.rename(columns={
    'points': 'prev_points_driver',
    'wins': 'prev_wins_driver'
}, inplace=True)

constructor_previous = constructor_standings.merge(races[['raceId', 'year', 'round']], on='raceId')
constructor_previous = constructor_previous.sort_values(['year', 'round', 'constructorId'])
constructor_previous['next_round'] = constructor_previous['round'] + 1
constructor_previous.rename(columns={
    'points': 'prev_points_constructor',
    'wins': 'prev_wins_constructor'
}, inplace=True)

# Now join these previous points to the main results dataframe
# Join on year, next_round, and driverId/constructorId
results = results.merge(
    driver_previous[['year', 'next_round', 'driverId', 'prev_points_driver', 'prev_wins_driver']],
    left_on=['year', 'round', 'driverId'],
    right_on=['year', 'next_round', 'driverId'],
    how='left'
)

results = results.merge(
    constructor_previous[['year', 'next_round', 'constructorId', 'prev_points_constructor', 'prev_wins_constructor']],
    left_on=['year', 'round', 'constructorId'],
    right_on=['year', 'next_round', 'constructorId'],
    how='left'
)

# Fill NaN values with 0 (for first races of the season)
results['prev_points_driver'] = results['prev_points_driver'].fillna(0)
results['prev_points_constructor'] = results['prev_points_constructor'].fillna(0)
results['prev_wins_driver'] = results['prev_wins_driver'].fillna(0)
results['prev_wins_constructor'] = results['prev_wins_constructor'].fillna(0)

In [36]:
results = results[['raceId', 'year', 'driverId', 'constructorId', 'round', 'grid', 'position', 'circuitId', 
                  'prev_points_driver', 'prev_points_constructor', 'prev_wins_driver', 'prev_wins_constructor']]

# Rename columns to maintain compatibility with existing code
results = results.rename(columns={
    'prev_points_driver': 'driver_points',
    'prev_points_constructor': 'constructor_points',
    'prev_wins_driver': 'driver_wins',
    'prev_wins_constructor': 'constructor_wins'
})

results.to_csv("cleaned/results2.csv", index=False)

# results = results[['raceId', 'year', 'driverId', 'constructorId', 'round', 'grid', 'position', 'circuitId', 'points_driver', 'points_constructor', 'wins', 'wins_constructor']]

# results.to_csv("cleaned/results.csv", index=False)
