# RL for Autonomous Vehicle Tasks: Safety and Traffic Optimization

## Agents

In [57]:
# TODO: define agents here

## Scenarios

In [58]:
import gymnasium
import highway_env
from matplotlib import pyplot as plt
%matplotlib inline
from stable_baselines3 import DQN
import logging
import json
import os
import numpy as np

### Lane Changing

In [59]:
lane_changing_env = gymnasium.make('highway-v0', render_mode='rgb_array')
config = {
    "vehicles_count": 50,  # Number of vehicles in the environment
    "controlled_vehicles": 1,  # Number of vehicles controlled by the agent
    "duration": 40,  # Duration of each episode
    "reverse_penalty": -10,  # Large penalty for reversing
    "drifting_penalty": -5,  # Penalty for drifting or sharp turns
    "collision_reward": -2,  # Stronger penalty for collisions
    "off_road_penalty": -10,  # Penalize for going off-road
    "lane_change_reward": 0.1,  # Reward for staying in lane
    "reward_speed_range": [10, 30],  # Reward only within this speed range
    "simulation_frequency": 15,  # Lower simulation speed to avoid erratic behavior
    "policy_frequency": 1,  # Fewer policy updates per second
    "screen_width": 600,
    "screen_height": 400,
    "offscreen_rendering": False,
    "show_trajectories": True,
    "action": {
        "type": "DiscreteAction"  # Discrete control (steer left, right, accelerate)
    },
}

lane_changing_env.unwrapped.configure(config)
lane_changing_env.reset()

(array([[ 1.        ,  0.91139394,  0.25      ,  0.3125    ,  0.        ],
        [ 1.        ,  0.11569419, -0.25      , -0.02011949,  0.        ],
        [ 1.        ,  0.22436145,  0.5       , -0.02403726,  0.        ],
        [ 1.        ,  0.3249993 , -0.25      , -0.02391403,  0.        ],
        [ 1.        ,  0.42816332, -0.25      , -0.01396594,  0.        ]],
       dtype=float32),
 {'speed': 25,
  'crashed': False,
  'action': 7,
  'rewards': {'collision_reward': 0.0,
   'right_lane_reward': 0.3333333333333333,
   'high_speed_reward': 0.75,
   'on_road_reward': 1.0}})

### Roundabout

In [60]:
roundabout_env = gymnasium.make('roundabout-v0', render_mode='rgb_array')
config = {
    "vehicles_count": 50,  # Number of vehicles in the environment
    "controlled_vehicles": 1,  # Number of vehicles controlled by the agent
    "duration": 40,  # Duration of each episode
    "reverse_penalty": -10,  # Large penalty for reversing
    "drifting_penalty": -5,  # Penalty for drifting or sharp turns
    "collision_reward": -2,  # Stronger penalty for collisions
    "off_road_penalty": -10,  # Penalize for going off-road
    "lane_change_reward": 0.1,  # Reward for staying in lane
    "reward_speed_range": [10, 30],  # Reward only within this speed range
    "simulation_frequency": 15,  # Lower simulation speed to avoid erratic behavior
    "policy_frequency": 1,  # Fewer policy updates per second
    "screen_width": 600,
    "screen_height": 400,
    "offscreen_rendering": False,
    "show_trajectories": True,
    "action": {
        "type": "DiscreteAction"  # Discrete control (steer left, right, accelerate)
    },
}
roundabout_env.unwrapped.configure(config)
roundabout_env.reset()

(array([[ 1.0000000e+00,  2.0000000e-02,  4.4999999e-01,  0.0000000e+00,
         -5.3333336e-01],
        [ 1.0000000e+00, -2.8128991e-02,  1.9801202e-01,  8.8147110e-01,
          1.2521912e-01],
        [ 1.0000000e+00, -1.9783117e-01,  1.3587800e-01,  5.6359500e-01,
          8.2056445e-01],
        [ 1.0000000e+00,  1.0000000e+00, -2.0000000e-02, -1.0000000e+00,
          2.2204460e-16],
        [ 1.0000000e+00, -1.5475370e-01, -1.2669370e-01, -7.3425084e-01,
          8.9687204e-01]], dtype=float32),
 {'speed': 8,
  'crashed': False,
  'action': 8,
  'rewards': {'collision_reward': False,
   'high_speed_reward': 0.0,
   'lane_change_reward': False,
   'on_road_reward': True}})

### Overtaking

In [61]:
overtaking_env = gymnasium.make('highway-v0', render_mode='rgb_array')
config = {
    "vehicles_count": 50,  # Number of vehicles in the environment
    "controlled_vehicles": 1,  # Number of vehicles controlled by the agent
    "duration": 40,  # Duration of each episode
    "reverse_penalty": -10,  # Large penalty for reversing
    "drifting_penalty": -5,  # Penalty for drifting or sharp turns
    "collision_reward": -2,  # Stronger penalty for collisions
    "off_road_penalty": -10,  # Penalize for going off-road
    "lane_change_reward": 0.1,  # Reward for staying in lane
    "reward_speed_range": [10, 30],  # Reward only within this speed range
    "simulation_frequency": 15,  # Lower simulation speed to avoid erratic behavior
    "policy_frequency": 1,  # Fewer policy updates per second
    "screen_width": 600,
    "screen_height": 400,
    "offscreen_rendering": False,
    "show_trajectories": True,
    "action": {
        "type": "DiscreteAction"  # Discrete control (steer left, right, accelerate)
    },
}
overtaking_env.unwrapped.configure(config)
overtaking_env.reset()

(array([[ 1.        ,  0.8799433 ,  0.25      ,  0.3125    ,  0.        ],
        [ 1.        ,  0.09550186,  0.25      , -0.02825355,  0.        ],
        [ 1.        ,  0.20181546,  0.        , -0.03000795,  0.        ],
        [ 1.        ,  0.30996537,  0.        , -0.04900004,  0.        ],
        [ 1.        ,  0.41641402,  0.        , -0.03983819,  0.        ]],
       dtype=float32),
 {'speed': 25,
  'crashed': False,
  'action': 2,
  'rewards': {'collision_reward': 0.0,
   'right_lane_reward': 0.3333333333333333,
   'high_speed_reward': 0.75,
   'on_road_reward': 1.0}})

## Training and Testing

In [62]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def create_directory(path):
    """
    Create directory if it doesn't exist
    
    Args:
        path (str): Directory path to create
    """
    try:
        os.makedirs(path, exist_ok=True)
        logger.info(f"Directory created: {path}")
    except Exception as e:
        logger.error(f"Error creating directory {path}: {e}")

def save_pipeline_data(agent_name, stage, environment_name, data):
    """
    Save pipeline stage data
    
    Args:
        agent_name (str): Name of the RL agent
        stage (str): Current pipeline stage
        environment_name (str): Name of the environment
        data (dict): Data to be saved
    """
    create_directory(f"results/{agent_name}")
    
    try:
        filename = f"results/{agent_name}/{environment_name}_data.json"
        with open(filename, 'w') as f:
            json.dump(data, f, indent=4)
        logger.info(f"Data saved for {stage} in {environment_name}")
    except Exception as e:
        logger.error(f"Error saving data: {e}")

In [63]:
def train(agent, environment, agent_name, stage, environment_name, timesteps=1000):
    """
    Train the agent in a specific environment and save the model
    
    Args:
        agent (sb3.BaseAlgorithm): RL agent to train
        environment (gym.Env): Environment to train in
        agent_name (str): Name of the agent
        stage (str): Current pipeline stage
        environment_name (str): Name of the environment
        timesteps (int, optional): Number of training timesteps. Defaults to 100.
    
    Returns:
        Trained agent
    """
    try:
        # Set environment and learn
        agent.set_env(environment)
        training_results = agent.learn(total_timesteps=timesteps)
        
        # Create directories for model and results
        create_directory(f"models/{agent_name}/{stage}")
        
        # Save model
        agent.save(f"models/{agent_name}/{stage}/{environment_name}_model")
        
        # Prepare and save training information
        training_info = {
            'agent_name': agent_name,
            'environment_name': environment_name,
            'stage': stage,
            'total_timesteps': timesteps,
            'training_results': str(training_results)
        }
        save_pipeline_data(agent_name, stage, environment_name, training_info)
        
        logger.info(f"Training completed for {agent_name} in {environment_name}")
        return agent
    
    except Exception as e:
        logger.error(f"Training failed for {agent_name} in {environment_name}: {e}")
        raise



In [64]:
def test(agent, environment, agent_name, stage, environment_name, num_episodes=3):
    test_results = []
    total_rewards = []
    total_collisions = 0
    traffic_speeds = []

    for episode in range(num_episodes):
        try:
            obs, info = environment.reset()
            done = truncated = False
            episode_reward = 0
            episode_collisions = 0
            episode_speeds = []

            while not (done or truncated):
                # Predict action using the agent
                action, _states = agent.predict(obs, deterministic=True)
                obs, reward, done, truncated, info = environment.step(action)

                # Update reward
                episode_reward += reward
                
                # Check for collisions
                if info.get("crashed", False):
                    episode_collisions += 1

                # Gather traffic speeds
                current_speeds = [
                    vehicle.speed
                    for vehicle in environment.unwrapped.road.vehicles  # Use unwrapped
                    if vehicle != environment.unwrapped.vehicle  # Exclude agent vehicle
                ]
                episode_speeds.extend(current_speeds)

                # Optional rendering
                environment.render()

            # Collect data for this episode
            total_rewards.append(episode_reward)
            total_collisions += episode_collisions
            traffic_speeds.extend(episode_speeds)

            test_results.append({
                'episode': episode,
                'total_reward': episode_reward,
                'collisions': episode_collisions,
                'avg_speed': np.mean(episode_speeds) if episode_speeds else 0,
                'speed_variance': np.var(episode_speeds) if episode_speeds else 0
            })
        
        except Exception as e:
            logger.error(f"Test episode {episode + 1} failed in {environment_name}: {e}")

    # Calculate overall KPIs
    avg_reward = np.mean(total_rewards)
    avg_speed = np.mean(traffic_speeds) if traffic_speeds else 0
    speed_variance = np.var(traffic_speeds) if traffic_speeds else 0

    logger.info(f"Test completed in {environment_name} - "
                f"Avg Reward: {avg_reward}, Total Collisions: {total_collisions}, "
                f"Avg Traffic Speed: {avg_speed}, Speed Variance: {speed_variance}")

    # Save test results and KPIs
    save_pipeline_data(agent_name, stage, environment_name, {
        'test_results': test_results,
        'kpis': {
            'average_reward': avg_reward,
            'total_collisions': total_collisions,
            'average_speed': avg_speed,
            'speed_variance': speed_variance
        },
        'environment': environment_name,
        'stage': stage
    })

    return {
        'test_results': test_results,
        'kpis': {
            'average_reward': avg_reward,
            'total_collisions': total_collisions,
            'average_speed': avg_speed,
            'speed_variance': speed_variance
        }
    }

In [65]:
def rl_pipeline(initial_agent, agent_name, lane_changing_env, roundabout_env, overtaking_env):
    """
    Sequential environment training pipeline
    
    Args:
        initial_agent (sb3.BaseAlgorithm): Initial RL agent
        agent_name (str): Name of the agent
        lane_changing_env (gym.Env): Lane changing environment
        roundabout_env (gym.Env): Roundabout environment
        overtaking_env (gym.Env): Overtaking environment
    
    Returns:
        Trained agent
    """
    performance = {}

    logger.info("Sequential Environment Pipeline Started")
    
    # Stage 1: Lane Changing
    logger.info("Stage 1: Lane Changing Environment")
    
    # Train in Lane Changing
    lane_changing_agent = train(
        initial_agent, lane_changing_env, 
        agent_name, 'lane_changing', 'lane_changing'
    )
    
    # Test in Lane Changing
    performance["lane_changing"] = test(
        lane_changing_agent, lane_changing_env, 
        agent_name, 'pos_training_test', 'lane_changing'
    )
    
    # Stage 2: Roundabout
    logger.info("Stage 2: Roundabout Environment")
    
    # First, test the lane changing agent in roundabout
    performance["lane_changing_roundabout"] = test(
        lane_changing_agent, roundabout_env, 
        agent_name, 'pre_training_test', 'roundabout'
    )
    
    # Then train in Roundabout
    roundabout_agent = train(
        lane_changing_agent, roundabout_env, 
        agent_name, 'roundabout', 'roundabout'
    )
    
    # Test in Roundabout
    performance["roundabout"] = test(
        roundabout_agent, roundabout_env, 
        agent_name, 'pos_train_test', 'roundabout'
    )
    
    # Stage 3: Overtaking
    logger.info("Stage 3: Overtaking Environment")
    
    # First, test the roundabout agent in overtaking
    performance["roundabout_overtaking"] = test(
        roundabout_agent, overtaking_env, 
        agent_name, 'pre_training_test', 'overtaking'
    )
    
    # Then train in Overtaking
    overtaking_agent = train(
        roundabout_agent, overtaking_env, 
        agent_name, 'overtaking', 'overtaking'
    )
    
    # Test in Overtaking
    performance["overtaking"] = test(
        overtaking_agent, overtaking_env, 
        agent_name, 'pos_training_test', 'overtaking'
    )
    
    logger.info("Sequential Environment Pipeline Completed")

    # Save the final agent
    create_directory(f"models/{agent_name}/final")
    overtaking_agent.save(f"models/{agent_name}/{agent_name}_final_model")
    
    return overtaking_agent, performance

- ### Agent 1

In [66]:
# Execute the pipeline to agent 1

# a1_results = pipeline(...)

# Display Results

agent_DQN = DQN('MlpPolicy', lane_changing_env,
        policy_kwargs=dict(net_arch=[256, 256]),
        learning_rate = 1e-4,
        buffer_size=15000,
        learning_starts=200,
        batch_size=64,
        gamma=0.8,
        train_freq=1,
        gradient_steps=1,
        target_update_interval=50,
        verbose=1)

final_agent_DQN, performance = rl_pipeline(agent_DQN, 'agent_DQN', lane_changing_env, roundabout_env, overtaking_env)

2024-11-23 19:07:21,116 - INFO - Sequential Environment Pipeline Started
2024-11-23 19:07:21,117 - INFO - Stage 1: Lane Changing Environment


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 40       |
|    ep_rew_mean      | 5.25     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1        |
|    time_elapsed     | 95       |
|    total_timesteps  | 160      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 40       |
|    ep_rew_mean      | 3.06     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1        |
|    time_elapsed     | 193      |
|    total_timesteps  | 320      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             |

2024-11-23 19:17:05,504 - INFO - Directory created: models/agent_DQN/lane_changing
2024-11-23 19:17:05,519 - INFO - Directory created: results/agent_DQN
2024-11-23 19:17:05,520 - INFO - Data saved for lane_changing in lane_changing
2024-11-23 19:17:05,521 - INFO - Training completed for agent_DQN in lane_changing
2024-11-23 19:17:54,862 - INFO - Test completed in lane_changing - Avg Reward: 20.79755539860387, Total Collisions: 2, Avg Traffic Speed: 20.792449203201404, Speed Variance: 1.8339659257507437
2024-11-23 19:17:54,863 - INFO - Directory created: results/agent_DQN
2024-11-23 19:17:54,864 - INFO - Data saved for pos_training_test in lane_changing
2024-11-23 19:17:54,864 - INFO - Stage 2: Roundabout Environment
2024-11-23 19:18:38,076 - INFO - Test completed in roundabout - Avg Reward: 7.0, Total Collisions: 0, Avg Traffic Speed: 16.05936435139665, Speed Variance: 3.849586578585318
2024-11-23 19:18:38,076 - INFO - Directory created: results/agent_DQN
2024-11-23 19:18:38,077 - INFO

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.5     |
|    ep_rew_mean      | 3.72     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 11       |
|    time_elapsed     | 11       |
|    total_timesteps  | 122      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26       |
|    ep_rew_mean      | 2.44     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 11       |
|    time_elapsed     | 18       |
|    total_timesteps  | 208      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.176    |
|    n_updates        | 807      |
----------------------------------
-------------

2024-11-23 19:20:03,076 - INFO - Directory created: models/agent_DQN/roundabout
2024-11-23 19:20:03,091 - INFO - Directory created: results/agent_DQN
2024-11-23 19:20:03,092 - INFO - Data saved for roundabout in roundabout
2024-11-23 19:20:03,093 - INFO - Training completed for agent_DQN in roundabout
2024-11-23 19:20:37,091 - INFO - Test completed in roundabout - Avg Reward: 9.818181818181818, Total Collisions: 1, Avg Traffic Speed: 12.076996558429755, Speed Variance: 34.41200189550674
2024-11-23 19:20:37,093 - INFO - Directory created: results/agent_DQN
2024-11-23 19:20:37,094 - INFO - Data saved for pos_train_test in roundabout
2024-11-23 19:20:37,095 - INFO - Stage 3: Overtaking Environment
2024-11-23 19:21:44,014 - INFO - Test completed in overtaking - Avg Reward: 28.962457275002382, Total Collisions: 3, Avg Traffic Speed: 20.56155841407277, Speed Variance: 2.0081564468740827
2024-11-23 19:21:44,015 - INFO - Directory created: results/agent_DQN
2024-11-23 19:21:44,016 - INFO - Dat

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31       |
|    ep_rew_mean      | 3.26     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1        |
|    time_elapsed     | 66       |
|    total_timesteps  | 124      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.5     |
|    ep_rew_mean      | 6.17     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1        |
|    time_elapsed     | 135      |
|    total_timesteps  | 252      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.117    |
|    n_updates        | 1651     |
----------------------------------
-------------

2024-11-23 19:30:38,398 - INFO - Directory created: models/agent_DQN/overtaking
2024-11-23 19:30:38,414 - INFO - Directory created: results/agent_DQN
2024-11-23 19:30:38,415 - INFO - Data saved for overtaking in overtaking
2024-11-23 19:30:38,415 - INFO - Training completed for agent_DQN in overtaking
2024-11-23 19:31:21,058 - INFO - Test completed in overtaking - Avg Reward: 18.86010745501706, Total Collisions: 2, Avg Traffic Speed: 20.53784970423615, Speed Variance: 2.1475260098291247
2024-11-23 19:31:21,059 - INFO - Directory created: results/agent_DQN
2024-11-23 19:31:21,060 - INFO - Data saved for pos_training_test in overtaking
2024-11-23 19:31:21,061 - INFO - Sequential Environment Pipeline Completed
2024-11-23 19:31:21,062 - INFO - Directory created: models/agent_DQN/final


- ### Agent N

## Performance

In [67]:
print(performance)

{'lane_changing': {'test_results': [{'episode': 0, 'total_reward': 37.33333333333332, 'collisions': 0, 'avg_speed': 20.916033971711805, 'speed_variance': 1.251580378247554}, {'episode': 1, 'total_reward': 6.73594074074074, 'collisions': 1, 'avg_speed': 20.00223958045685, 'speed_variance': 3.781444083911583}, {'episode': 2, 'total_reward': 18.323392121737548, 'collisions': 1, 'avg_speed': 20.861363515278416, 'speed_variance': 1.9346776859169637}], 'kpis': {'average_reward': 20.79755539860387, 'total_collisions': 2, 'average_speed': 20.792449203201404, 'speed_variance': 1.8339659257507437}}, 'lane_changing_roundabout': {'test_results': [{'episode': 0, 'total_reward': 9.09090909090909, 'collisions': 0, 'avg_speed': 16.929400876682518, 'speed_variance': 1.7146135297702738}, {'episode': 1, 'total_reward': 8.272727272727273, 'collisions': 0, 'avg_speed': 16.020709335532096, 'speed_variance': 6.10137651563923}, {'episode': 2, 'total_reward': 3.6363636363636362, 'collisions': 0, 'avg_speed': 1

In [68]:
# lane changing performance
lane_changing_performance = performance["lane_changing"]

print("Lane Changing Performance")
print("Average Reward: ", lane_changing_performance["kpis"]["average_reward"])
print("Total Collisions: ", lane_changing_performance["kpis"]["total_collisions"])
print("Average Speed: ", lane_changing_performance["kpis"]["average_speed"])
print("Speed Variance: ", lane_changing_performance["kpis"]["speed_variance"])
print("\n")

Lane Changing Performance
Average Reward:  20.79755539860387
Total Collisions:  2
Average Speed:  20.792449203201404
Speed Variance:  1.8339659257507437




In [69]:
# lane changing roundabout performance
lane_changing_roundabout_performance = performance["lane_changing_roundabout"]

print("Lane Changing Roundabout Performance")
print("Average Reward: ", lane_changing_roundabout_performance["kpis"]["average_reward"])
print("Total Collisions: ", lane_changing_roundabout_performance["kpis"]["total_collisions"])
print("Average Speed: ", lane_changing_roundabout_performance["kpis"]["average_speed"])
print("Speed Variance: ", lane_changing_roundabout_performance["kpis"]["speed_variance"])
print("\n")

Lane Changing Roundabout Performance
Average Reward:  7.0
Total Collisions:  0
Average Speed:  16.05936435139665
Speed Variance:  3.849586578585318




In [70]:
# roundabout performance
roundabout_performance = performance["roundabout"]

print("Roundabout Performance")
print("Average Reward: ", roundabout_performance["kpis"]["average_reward"])
print("Total Collisions: ", roundabout_performance["kpis"]["total_collisions"])
print("Average Speed: ", roundabout_performance["kpis"]["average_speed"])
print("Speed Variance: ", roundabout_performance["kpis"]["speed_variance"])
print("\n")

Roundabout Performance
Average Reward:  9.818181818181818
Total Collisions:  1
Average Speed:  12.076996558429755
Speed Variance:  34.41200189550674




In [71]:
# roundabout overtaking performance
roundabout_overtaking_performance = performance["roundabout_overtaking"]

print("Roundabout Overtaking Performance")
print("Average Reward: ", roundabout_overtaking_performance["kpis"]["average_reward"])
print("Total Collisions: ", roundabout_overtaking_performance["kpis"]["total_collisions"])
print("Average Speed: ", roundabout_overtaking_performance["kpis"]["average_speed"])
print("Speed Variance: ", roundabout_overtaking_performance["kpis"]["speed_variance"])
print("\n")

Roundabout Overtaking Performance
Average Reward:  28.962457275002382
Total Collisions:  3
Average Speed:  20.56155841407277
Speed Variance:  2.0081564468740827




In [None]:
# overtaking performance
overtaking_performance = performance["overtaking"]

print("Overtaking Performance")
print("Average Reward: ", overtaking_performance["kpis"]["average_reward"])
print("Total Collisions: ", overtaking_performance["kpis"]["total_collisions"])
print("Average Speed: ", overtaking_performance["kpis"]["average_speed"])
print("Speed Variance: ", overtaking_performance["kpis"]["speed_variance"])
print("\n")

Overtaking Performance
Average Reward:  18.86010745501706
Total Collisions:  2
Average Speed:  20.53784970423615
Speed Variance:  2.1475260098291247




: 

## Analysis

## Conclusion