In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
races_data = pd.read_csv('../../../data/processed/fully_integrated_data.csv')
lap_time_data = pd.read_csv("../../../data/processed/lap_times.csv")

# Merge datasets
merged_data = pd.merge(
    races_data,  # Changed from race_data to races_data
    lap_time_data,
    on=["raceId", "driverId"],
    how="inner"
)

if "Unnamed: 0" in merged_data.columns:
    merged_data = merged_data.drop(columns=["Unnamed: 0"])

merged_data.columns


Index(['raceId', 'season', 'raceNumber', 'circuitId', 'prixName', 'raceDate',
       'driverId', 'constructorId', 'driverStartGridPos', 'driverFinalGridPos',
       'driverFinalRank', 'driverRacePoints', 'driverLapCount',
       'driverFatestLapNumber', 'driverFastestLapTime',
       'driverFastestLapSpeed', 'constructorName', 'constructorNationality',
       'constructorChampionshipStandingPoints',
       'constructorChampionshipStandingPosition',
       'constructorChampionshipStandingWins', 'constructorRacePoints',
       'driverDateOfBirth', 'driverNationality',
       'driverChampionshipStandingPoints',
       'driverChampionshipStandingPosition', 'driverChampionshipStandingWins',
       'circuitName', 'circuitLocation', 'circuitCountry', 'lat', 'lng', 'alt',
       'driverRaceResultStatus', 'driverName', 'driverAge', 'race_time',
       'driverRaceLapNumber', 'driverRaceFinalPosition', 'driverLapTime',
       'driverLapTimeInMilliseconds'],
      dtype='object')

In [5]:

merged_data["TireDegradation"] = (
    merged_data.groupby(["raceId", "driverId"])["driverLapTimeInMilliseconds"]
    .diff() / merged_data.groupby("raceId")["driverLapTimeInMilliseconds"].transform("mean")
)

# Fill missing values with median
merged_data["TireDegradation"] = (
    merged_data.groupby(["raceId", "driverId"])["driverLapTimeInMilliseconds"]
    .diff()
    .fillna(method='bfill')  
) / merged_data.groupby("raceId")["driverLapTimeInMilliseconds"].transform("mean")
merged_data["TireDegradation"] = merged_data.groupby(
    ["circuitId"]
)["TireDegradation"].transform(lambda x: x.fillna(x.median()))

#normalize TireDegradation
merged_data["TireDegradation"] = (
    merged_data["TireDegradation"] - merged_data["TireDegradation"].mean()
) / merged_data["TireDegradation"].std()


  .fillna(method='bfill')


In [6]:
def create_state_space(df):
    
    df['total_laps'] = df.groupby('raceId')['driverLapCount'].transform('max')
    
    # Create normalized lap percentage and bin using FIXED thresholds
    df['lap_pct'] = df['driverRaceLapNumber'] / df['total_laps']
    df['current_lap_stage'] = pd.cut(
        df['lap_pct'],
        bins=[0, 0.2, 0.4, 0.6, 0.8, 1.0],
        labels=[0, 1, 2, 3, 4],
        include_lowest=True
    )
    
    # Track position using final rank (ensure no NaN values)
    df['track_position'] = pd.cut(
        df['driverFinalRank'].fillna(20),  # Assume missing = backmarker
        bins=[0, 3.9, 10.9, 20],  # Exclusive right edges
        labels=[0, 1, 2]  # Leading, Midfield, Back
    )
    
    # Tire age calculation with proper grouping
    df['tire_age'] = df.groupby(['raceId', 'driverId']).cumcount() + 1
    
    # Identify pit stops using lap time spikes (customize threshold per circuit)
    pit_stop_threshold = df.groupby('circuitId')['driverLapTimeInMilliseconds'].transform(
        lambda x: x.quantile(0.95)
    )
    df['pit_flag'] = (df['driverLapTimeInMilliseconds'] > pit_stop_threshold).astype(int)
    
    
    df['tire_age'] = df.groupby(['raceId', 'driverId']).apply(
        lambda g: g['tire_age'].where(g['pit_flag'] == 0, 0).cumsum()
    ).reset_index(drop=True)
    
    df['tire_age'] = pd.cut(
        df['tire_age'],
        bins=[-1, 10, 20, 30, 50],  
        labels=[0, 1, 2, 3]
    )
    
    
    avg_lap = df.groupby('raceId')['driverLapTimeInMilliseconds'].transform('mean')
    std_lap = df.groupby('raceId')['driverLapTimeInMilliseconds'].transform('std')
    df['traffic_z'] = (df['driverLapTimeInMilliseconds'] - avg_lap) / std_lap
    df['traffic_density'] = pd.cut(
        df['traffic_z'],
        bins=[-np.inf, -1, 1, np.inf],
        labels=[0, 1, 2]  # Low, Medium, High traffic
    )
    

    df['pit_stop_loss'] = pd.qcut(
        df['driverLapTimeInMilliseconds'],
        q=[0, 0.3, 0.7, 1],
        labels=[0, 1, 2]  # Low, Medium, High
    )
    
    state_cols = [
        'TireDegradation',
        'current_lap_stage',
        'tire_age',
        'track_position',
        'traffic_density',
        'pit_stop_loss',
        'raceId',
    ]
    
    return df[state_cols].fillna(0).astype(int)

state_df = create_state_space(merged_data)
state_df.head()

  df['tire_age'] = df.groupby(['raceId', 'driverId']).apply(


Unnamed: 0,TireDegradation,current_lap_stage,tire_age,track_position,traffic_density,pit_stop_loss,raceId
0,0,0,0,0,1,1,1
1,0,0,0,0,1,1,1
2,0,0,0,0,1,1,1
3,0,0,0,0,1,1,1
4,0,0,1,0,1,1,1


In [7]:
print(state_df.isnull().sum())
print(state_df.describe(include='all'))

state_df.columns

TireDegradation      0
current_lap_stage    0
tire_age             0
track_position       0
traffic_density      0
pit_stop_loss        0
raceId               0
dtype: int64
       TireDegradation  current_lap_stage       tire_age  track_position  \
count    589081.000000      589081.000000  589081.000000   589081.000000   
mean          0.000716           1.903015       0.196937        1.237981   
std           0.969342           1.405782       0.666225        0.743334   
min         -49.000000           0.000000       0.000000        0.000000   
25%           0.000000           1.000000       0.000000        1.000000   
50%           0.000000           2.000000       0.000000        1.000000   
75%           0.000000           3.000000       0.000000        2.000000   
max          49.000000           4.000000       3.000000        2.000000   

       traffic_density  pit_stop_loss         raceId  
count    589081.000000  589081.000000  589081.000000  
mean          1.070591       0.

Index(['TireDegradation', 'current_lap_stage', 'tire_age', 'track_position',
       'traffic_density', 'pit_stop_loss', 'raceId'],
      dtype='object')

In [8]:
actions = [
    "PIT_NEXT_LAP", 
    "PIT_IN_2_LAPS", 
    "PIT_IN_3_LAPS", 
    "PIT_IN_4_LAPS", 
    "PIT_IN_5_LAPS", 
    "NO_PIT"
]


In [10]:
import numpy as np
import random
import logging
from collections import deque
import gym
from gym import spaces
import torch
import torch.nn as nn
import torch.optim as optim

# Configure logging; set level to WARNING and save logs to file.
logging.basicConfig(level=logging.WARNING, filename='pit_stop_strategy.log')
logger = logging.getLogger(__name__)

# ==============================
# Environment for Pit Stop Strategy
# ==============================
class PitStopEnv(gym.Env):
    def __init__(self, max_laps=100):
        super(PitStopEnv, self).__init__()
        self.max_laps = max_laps
        
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(7,), dtype=np.float32)

        self.action_space = spaces.Discrete(6)
        self.actions = [
            "PIT_NEXT_LAP", 
            "PIT_IN_2_LAPS", 
            "PIT_IN_3_LAPS", 
            "PIT_IN_4_LAPS", 
            "PIT_IN_5_LAPS", 
            "NO_PIT"
        ]
        self.reset()
        
    def reset(self):
        self.tire_degradation = 0.0         # Tire degradation starts at 0.
        self.current_lap_stage = 1          # Start at lap stage 1.
        self.tire_age = 0.0                 # No tire age at the beginning.
        self.track_position = 0             # Starting track position (can be modified as needed).
        self.traffic_density = 0.0          # Assume no traffic density initially.
        self.pit_stop_loss = 0.0            # No pit stop loss at the start.
        self.raceId = 1                     # Set raceId to 1 for the episode.
        
        self.state = np.array([
            self.tire_degradation, 
            self.current_lap_stage, 
            self.tire_age, 
            self.track_position, 
            self.traffic_density, 
            self.pit_stop_loss, 
            float(self.raceId)
        ], dtype=np.float32)
        
        self.total_reward = 0.0
        self.done = False
        return self.state
    
    def step(self, action):
        action_name = self.actions[action]
        logger.info(f"Current state: {self.state}")
        reward = 0.0
        
        # If NO_PIT, give a small progress reward and update tire degradation and age.
        if action_name == "NO_PIT":
            reward += 0.1
            logger.info("Action: NO_PIT, rewarding progress: +0.1")
            self.tire_age += 1.0
            # Simulate gradual degradation when not pitting.
            self.tire_degradation += np.random.uniform(0.1, 0.3)
            self.pit_stop_loss = 0.0  # No pit loss when not pitting.
        else:
            # For pit actions, determine the pit delay.
            if action_name == "PIT_NEXT_LAP":
                pit_delay = 1
            else:
                pit_delay = int(action_name.split("_")[2])
            # Apply a penalty that increases slightly with delay.
            penalty = -0.5 - (pit_delay - 1) * 0.05
            reward += penalty
            logger.info(f"Action: {action_name}, applying penalty: {penalty}")
            # Reset tire age and degradation upon pitting.
            self.tire_age = 0.0
            self.tire_degradation = 0.0
            # Set pit_stop_loss proportional to pit delay.
            self.pit_stop_loss = pit_delay * 1.0
        
        # Update current lap stage.
        self.current_lap_stage += 1
        
        # Update track_position (simulate slight random change).
        self.track_position += np.random.choice([-1, 0, 1])
        # Update traffic_density randomly between 0 and 5.
        self.traffic_density = np.random.uniform(0, 5)
        
        # RaceId remains constant during an episode.
        new_state = np.array([
            self.tire_degradation, 
            self.current_lap_stage, 
            self.tire_age, 
            self.track_position, 
            self.traffic_density, 
            self.pit_stop_loss, 
            float(self.raceId)
        ], dtype=np.float32)
        
        if abs(new_state[1] - self.state[1]) > 100:
            logger.warning(f"Drastic lap stage change: {self.state[1]} -> {new_state[1]}")
        
        self.state = new_state
        
        # End the episode if current lap stage reaches max_laps.
        if self.current_lap_stage >= self.max_laps:
            self.done = True
        
        self.total_reward += reward
        
        info = {"total_reward": self.total_reward}
        return self.state, reward, self.done, info

# ==============================
# DQN Model in PyTorch
# ==============================
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 24)
        self.fc2 = nn.Linear(24, 24)
        self.fc3 = nn.Linear(24, action_size)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# ==============================
# DQN Agent Implementation in PyTorch
# ==============================
class DQNAgent:
    def __init__(self, state_size, action_size, device):
        self.state_size = state_size
        self.action_size = action_size
        self.device = device
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = DQN(state_size, action_size).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()
        
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        with torch.no_grad():
            act_values = self.model(state_tensor)
        return torch.argmax(act_values[0]).item()
    
    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
            next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0).to(self.device)
            target = reward
            if not done:
                with torch.no_grad():
                    next_q = self.model(next_state_tensor)
                target = reward + self.gamma * torch.max(next_q).item()
            current_q = self.model(state_tensor)
            target_f = current_q.clone().detach()
            target_f[0][action] = target
            self.optimizer.zero_grad()
            output = self.model(state_tensor)
            loss = self.criterion(output, target_f)
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# ==============================
# Training Loop in PyTorch
# ==============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = PitStopEnv(max_laps=100)
state_size = env.observation_space.shape[0]  # now 7
action_size = env.action_space.n             # 6 actions
agent = DQNAgent(state_size, action_size, device)
episodes = 1000
batch_size = 32

for e in range(episodes):
    state = env.reset()
    total_episode_reward = 0
    for t in range(env.max_laps):
        action = agent.act(state)
        next_state, reward, done, info = env.step(action)
        total_episode_reward += reward
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            break
    if len(agent.memory) > batch_size:
        agent.replay(batch_size)
    if (e+1) % 100 == 0:
        print(f"Episode: {e+1}/{episodes}, Total Reward: {total_episode_reward:.2f}, Epsilon: {agent.epsilon:.2f}")

model_save_path = "pit_stop_strategy_model.pth"
torch.save(agent.model.state_dict(), model_save_path)
print(f"Training complete. Model saved to {model_save_path}")


Episode: 100/1000, Total Reward: -28.55, Epsilon: 0.61
Episode: 200/1000, Total Reward: -12.65, Epsilon: 0.37
Episode: 300/1000, Total Reward: -0.35, Epsilon: 0.22
Episode: 400/1000, Total Reward: 1.30, Epsilon: 0.13
Episode: 500/1000, Total Reward: -4.05, Epsilon: 0.08
Episode: 600/1000, Total Reward: 6.40, Epsilon: 0.05
Episode: 700/1000, Total Reward: 6.25, Epsilon: 0.03
Episode: 800/1000, Total Reward: 9.25, Epsilon: 0.02
Episode: 900/1000, Total Reward: 8.10, Epsilon: 0.01
Episode: 1000/1000, Total Reward: 9.90, Epsilon: 0.01
Training complete. Model saved to pit_stop_strategy_model.pth


### Observations

1. *Total Reward Trend:*
   - *Initial Episodes (1-100):* The total reward is highly negative (-28.50), indicating the agent is performing poorly. This is expected in the early stages of training, as the agent is exploring randomly (high ε).
   - *Mid-Training (200-400):* The reward improves significantly, moving from -12.05 to 1.35. This suggests the agent is learning better strategies.
   - *Later Episodes (500-1000):* The reward stabilizes around 7-10, indicating the agent has converged to a reasonably good policy.

2. *Epsilon Decay:*
   - *Epsilon (ε):* Starts at 0.61 (high exploration) and decays to 0.01 (low exploration) by the end of training. This is typical for ε-greedy exploration, where the agent shifts from exploration to exploitation as it learns.

3. *Final Performance:*
   - The final reward (9.90) is positive, which is a good sign. It means the agent is achieving its objectives (e.g., minimizing pit stops, maximizing lap efficiency).
