# RL for Autonomous Vehicle Tasks: Safety and Traffic Optimization

## Agents

In [None]:
# TODO: define agents here

## Scenarios

In [129]:
import gymnasium
import highway_env
from matplotlib import pyplot as plt
%matplotlib inline
from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
import logging
import json
import os
import numpy as np

### Lane Changing

In [4]:
lane_changing_env = gymnasium.make('highway-v0', render_mode='rgb_array')
config = {
    "vehicles_count": 50,  # Number of vehicles in the environment
    "controlled_vehicles": 1,  # Number of vehicles controlled by the agent
    "duration": 40,  # Duration of each episode
    "reverse_penalty": -10,  # Large penalty for reversing
    "drifting_penalty": -5,  # Penalty for drifting or sharp turns
    "collision_reward": -2,  # Stronger penalty for collisions
    "off_road_penalty": -10,  # Penalize for going off-road
    "lane_change_reward": 0.1,  # Reward for staying in lane
    "reward_speed_range": [10, 30],  # Reward only within this speed range
    "simulation_frequency": 15,  # Lower simulation speed to avoid erratic behavior
    "policy_frequency": 1,  # Fewer policy updates per second
    "screen_width": 600,
    "screen_height": 400,
    "offscreen_rendering": False,
    "show_trajectories": True,
    "action": {
        "type": "DiscreteAction"  # Discrete control (steer left, right, accelerate)
    },
}

lane_changing_env.unwrapped.configure(config)
lane_changing_env.reset()

(array([[ 1.        ,  0.9026946 ,  0.75      ,  0.3125    ,  0.        ],
        [ 1.        ,  0.09810572, -0.75      , -0.03643593,  0.        ],
        [ 1.        ,  0.2060501 , -0.5       , -0.01889157,  0.        ],
        [ 1.        ,  0.3165085 , -0.75      , -0.01931337,  0.        ],
        [ 1.        ,  0.43092677,  0.        , -0.01856169,  0.        ]],
       dtype=float32),
 {'speed': 25,
  'crashed': False,
  'action': 6,
  'rewards': {'collision_reward': 0.0,
   'right_lane_reward': 1.0,
   'high_speed_reward': 0.75,
   'on_road_reward': 1.0}})

### Roundabout

In [5]:
roundabout_env = gymnasium.make('roundabout-v0', render_mode='rgb_array')
config = {
    "vehicles_count": 50,  # Number of vehicles in the environment
    "controlled_vehicles": 1,  # Number of vehicles controlled by the agent
    "duration": 40,  # Duration of each episode
    "reverse_penalty": -10,  # Large penalty for reversing
    "drifting_penalty": -5,  # Penalty for drifting or sharp turns
    "collision_reward": -2,  # Stronger penalty for collisions
    "off_road_penalty": -10,  # Penalize for going off-road
    "lane_change_reward": 0.1,  # Reward for staying in lane
    "reward_speed_range": [10, 30],  # Reward only within this speed range
    "simulation_frequency": 15,  # Lower simulation speed to avoid erratic behavior
    "policy_frequency": 1,  # Fewer policy updates per second
    "screen_width": 600,
    "screen_height": 400,
    "offscreen_rendering": False,
    "show_trajectories": True,
    "action": {
        "type": "DiscreteAction"  # Discrete control (steer left, right, accelerate)
    },
}
roundabout_env.unwrapped.configure(config)
roundabout_env.reset()

(array([[ 1.0000000e+00,  2.0000000e-02,  4.4999999e-01,  0.0000000e+00,
         -5.3333336e-01],
        [ 1.0000000e+00, -4.1614782e-02,  1.9562262e-01,  7.7160704e-01,
          1.6414389e-01],
        [ 1.0000000e+00, -1.9581525e-01,  1.3876739e-01,  6.6368437e-01,
          9.3652779e-01],
        [ 1.0000000e+00,  1.0000000e+00, -2.0000000e-02, -1.0000000e+00,
          2.2204460e-16],
        [ 1.0000000e+00, -1.7001514e-01, -1.0533209e-01, -5.4396957e-01,
          8.7801415e-01]], dtype=float32),
 {'speed': 8,
  'crashed': False,
  'action': 8,
  'rewards': {'collision_reward': False,
   'high_speed_reward': 0.0,
   'lane_change_reward': False,
   'on_road_reward': True}})

### Overtaking

In [6]:
overtaking_env = gymnasium.make('highway-v0', render_mode='rgb_array')
config = {
    "vehicles_count": 50,  # Number of vehicles in the environment
    "controlled_vehicles": 1,  # Number of vehicles controlled by the agent
    "duration": 40,  # Duration of each episode
    "reverse_penalty": -10,  # Large penalty for reversing
    "drifting_penalty": -5,  # Penalty for drifting or sharp turns
    "collision_reward": -2,  # Stronger penalty for collisions
    "off_road_penalty": -10,  # Penalize for going off-road
    "lane_change_reward": 0.1,  # Reward for staying in lane
    "reward_speed_range": [10, 30],  # Reward only within this speed range
    "simulation_frequency": 15,  # Lower simulation speed to avoid erratic behavior
    "policy_frequency": 1,  # Fewer policy updates per second
    "screen_width": 600,
    "screen_height": 400,
    "offscreen_rendering": False,
    "show_trajectories": True,
    "action": {
        "type": "DiscreteAction"  # Discrete control (steer left, right, accelerate)
    },
}
overtaking_env.unwrapped.configure(config)
overtaking_env.reset()

(array([[ 1.        ,  0.8956426 ,  0.        ,  0.3125    ,  0.        ],
        [ 1.        ,  0.09184396,  0.75      , -0.04517423,  0.        ],
        [ 1.        ,  0.19276139,  0.25      , -0.01343443,  0.        ],
        [ 1.        ,  0.29735085,  0.25      , -0.02435903,  0.        ],
        [ 1.        ,  0.40706015,  0.        , -0.0261277 ,  0.        ]],
       dtype=float32),
 {'speed': 25,
  'crashed': False,
  'action': 7,
  'rewards': {'collision_reward': 0.0,
   'right_lane_reward': 0.0,
   'high_speed_reward': 0.75,
   'on_road_reward': 1.0}})

## Training and Testing

In [7]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def create_directory(path):
    """
    Create directory if it doesn't exist
    
    Args:
        path (str): Directory path to create
    """
    try:
        os.makedirs(path, exist_ok=True)
        logger.info(f"Directory created: {path}")
    except Exception as e:
        logger.error(f"Error creating directory {path}: {e}")

def save_pipeline_data(agent_name, stage, environment_name, data):
    """
    Save pipeline stage data
    
    Args:
        agent_name (str): Name of the RL agent
        stage (str): Current pipeline stage
        environment_name (str): Name of the environment
        data (dict): Data to be saved
    """
    create_directory(f"results/{agent_name}")
    
    try:
        filename = f"results/{agent_name}/{environment_name + "_" + stage}_data.json"
        with open(filename, 'w') as f:
            json.dump(data, f, indent=4)
        logger.info(f"Data saved for {stage} in {environment_name}")
    except Exception as e:
        logger.error(f"Error saving data: {e}")

In [134]:
def train(agent, environment, agent_name, stage, environment_name, timesteps=100):
    """
    Train the agent in a specific environment and save the model
    
    Args:
        agent (sb3.BaseAlgorithm): RL agent to train
        environment (gym.Env): Environment to train in
        agent_name (str): Name of the agent
        stage (str): Current pipeline stage
        environment_name (str): Name of the environment
        timesteps (int, optional): Number of training timesteps. Defaults to 100.
    
    Returns:
        Trained agent
    """
    try:
        # Wrap environment with Monitor
        log_dir = f"logs/{agent_name}"
        os.makedirs(log_dir, exist_ok=True)
        env = Monitor(environment, log_dir)
            
        # Set environment and learn
        agent.set_env(env)
        agent.learn(total_timesteps=timesteps)

        # Get training results
        training_results = {
            'rewards': env.get_episode_rewards(),
        }
        
        # Create directories for model and results
        create_directory(f"models/{agent_name}")
        
        # Save model
        agent.save(f"models/{agent_name}/{environment_name}_model")
        
        # Prepare and save training information
        training_info = {
            'agent_name': agent_name,
            'environment_name': environment_name,
            'stage': stage,
            'total_timesteps': timesteps,
            'training_results': training_results
        }
        save_pipeline_data(agent_name, "train", environment_name, training_info)
        
        logger.info(f"Training completed for {agent_name} in {environment_name}")
        return agent
    
    except Exception as e:
        logger.error(f"Training failed for {agent_name} in {environment_name}: {e}")
        raise



In [10]:
def test(agent, environment, agent_name, stage, environment_name, num_episodes=1):
    test_results = []
    total_rewards = []
    total_collisions = 0
    traffic_speeds = []

    for episode in range(num_episodes):
        try:
            obs, info = environment.reset()
            done = truncated = False
            episode_reward = 0
            episode_collisions = 0
            episode_speeds = []

            while not (done or truncated):
                # Predict action using the agent
                action, _states = agent.predict(obs, deterministic=True)
                obs, reward, done, truncated, info = environment.step(action)

                # Update reward
                episode_reward += reward
                
                # Check for collisions
                if info.get("crashed", False):
                    episode_collisions += 1

                # Gather traffic speeds
                current_speeds = [
                    vehicle.speed
                    for vehicle in environment.unwrapped.road.vehicles  # Use unwrapped
                    if vehicle != environment.unwrapped.vehicle  # Exclude agent vehicle
                ]
                episode_speeds.extend(current_speeds)

                # Optional rendering
                environment.render()

            environment.close()

            # Collect data for this episode
            total_rewards.append(episode_reward)
            total_collisions += episode_collisions
            traffic_speeds.extend(episode_speeds)

            test_results.append({
                'episode': episode,
                'total_reward': episode_reward,
                'collisions': episode_collisions,
                'avg_speed': np.mean(episode_speeds) if episode_speeds else 0,
                'speed_variance': np.var(episode_speeds) if episode_speeds else 0
            })
        
        except Exception as e:
            logger.error(f"Test episode {episode + 1} failed in {environment_name}: {e}")

    # Calculate overall KPIs
    avg_reward = np.mean(total_rewards)
    avg_speed = np.mean(traffic_speeds) if traffic_speeds else 0
    speed_variance = np.var(traffic_speeds) if traffic_speeds else 0

    logger.info(f"Test completed in {environment_name} - "
                f"Avg Reward: {avg_reward}, Total Collisions: {total_collisions}, "
                f"Avg Traffic Speed: {avg_speed}, Speed Variance: {speed_variance}")

    # Save test results and KPIs
    save_pipeline_data(agent_name, stage, environment_name, {
        'test_results': test_results,
        'kpis': {
            'average_reward': avg_reward,
            'total_collisions': total_collisions,
            'average_speed': avg_speed,
            'speed_variance': speed_variance
        },
        'environment': environment_name,
        'stage': stage
    })

    return {
        'test_results': test_results,
        'kpis': {
            'average_reward': avg_reward,
            'total_collisions': total_collisions,
            'average_speed': avg_speed,
            'speed_variance': speed_variance
        }
    }

In [11]:
def rl_pipeline(initial_agent, agent_name, lane_changing_env, roundabout_env, overtaking_env):
    """
    Sequential environment training pipeline
    
    Args:
        initial_agent (sb3.BaseAlgorithm): Initial RL agent
        agent_name (str): Name of the agent
        lane_changing_env (gym.Env): Lane changing environment
        roundabout_env (gym.Env): Roundabout environment
        overtaking_env (gym.Env): Overtaking environment
    
    Returns:
        Trained agent
    """
    performance = {}

    logger.info("Sequential Environment Pipeline Started")
    
    # Stage 1: Lane Changing
    logger.info("Stage 1: Lane Changing Environment")
    
    # Train in Lane Changing
    lane_changing_agent = train(
        initial_agent, lane_changing_env, 
        agent_name, 'lane_changing', 'lane_changing'
    )
    
    # Test in Lane Changing
    performance["lane_changing"] = test(
        lane_changing_agent, lane_changing_env, 
        agent_name, 'pos_training_test', 'lane_changing'
    )
    
    # Stage 2: Roundabout
    logger.info("Stage 2: Roundabout Environment")
    
    # First, test the lane changing agent in roundabout
    performance["lane_changing_roundabout"] = test(
        lane_changing_agent, roundabout_env, 
        agent_name, 'pre_training_test', 'roundabout'
    )
    
    # Then train in Roundabout
    roundabout_agent = train(
        lane_changing_agent, roundabout_env, 
        agent_name, 'roundabout', 'roundabout'
    )
    
    # Test in Roundabout
    performance["roundabout"] = test(
        roundabout_agent, roundabout_env, 
        agent_name, 'pos_training_test', 'roundabout'
    )
    
    # Stage 3: Overtaking
    logger.info("Stage 3: Overtaking Environment")
    
    # First, test the roundabout agent in overtaking
    performance["roundabout_overtaking"] = test(
        roundabout_agent, overtaking_env, 
        agent_name, 'pre_training_test', 'overtaking'
    )
    
    # Then train in Overtaking
    overtaking_agent = train(
        roundabout_agent, overtaking_env, 
        agent_name, 'overtaking', 'overtaking'
    )
    
    # Test in Overtaking
    performance["overtaking"] = test(
        overtaking_agent, overtaking_env, 
        agent_name, 'pos_training_test', 'overtaking'
    )
    
    logger.info("Sequential Environment Pipeline Completed")

    # Save the final agent
    create_directory(f"models/{agent_name}/final")
    overtaking_agent.save(f"models/{agent_name}/{agent_name}_final_model")
    
    return overtaking_agent, performance

- ### Agent 1

In [135]:
# Execute the pipeline to agent 1

# a1_results = pipeline(...)

# Display Results

agent_DQN = DQN('MlpPolicy', lane_changing_env,
        policy_kwargs=dict(net_arch=[256, 256]),
        learning_rate = 1e-4,
        buffer_size=15000,
        learning_starts=200,
        batch_size=64,
        gamma=0.8,
        train_freq=1,
        gradient_steps=1,
        target_update_interval=50,
        verbose=1)

final_agent_DQN, performance = rl_pipeline(agent_DQN, 'agent_DQN', lane_changing_env, roundabout_env, overtaking_env)

2024-11-27 14:09:21,498 - INFO - Sequential Environment Pipeline Started
2024-11-27 14:09:21,499 - INFO - Stage 1: Lane Changing Environment


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a DummyVecEnv.


2024-11-27 14:09:56,778 - INFO - Directory created: models/agent_DQN
2024-11-27 14:09:56,785 - INFO - Directory created: results/agent_DQN
2024-11-27 14:09:56,786 - INFO - Data saved for train in lane_changing
2024-11-27 14:09:56,786 - INFO - Training completed for agent_DQN in lane_changing
2024-11-27 14:10:15,733 - INFO - Test completed in lane_changing - Avg Reward: 17.116009871911153, Total Collisions: 0, Avg Traffic Speed: 20.929875051604423, Speed Variance: 1.218203343970156
2024-11-27 14:10:15,734 - INFO - Directory created: results/agent_DQN
2024-11-27 14:10:15,736 - INFO - Data saved for pos_training_test in lane_changing
2024-11-27 14:10:15,738 - INFO - Stage 2: Roundabout Environment
2024-11-27 14:10:19,628 - INFO - Test completed in roundabout - Avg Reward: 2.7727272727272725, Total Collisions: 1, Avg Traffic Speed: 14.570962810740308, Speed Variance: 2.9089537218699038
2024-11-27 14:10:19,630 - INFO - Directory created: results/agent_DQN
2024-11-27 14:10:19,632 - INFO - Da

Wrapping the env in a DummyVecEnv.


2024-11-27 14:10:25,090 - INFO - Directory created: models/agent_DQN
2024-11-27 14:10:25,096 - INFO - Directory created: results/agent_DQN
2024-11-27 14:10:25,097 - INFO - Data saved for train in roundabout
2024-11-27 14:10:25,098 - INFO - Training completed for agent_DQN in roundabout
2024-11-27 14:10:35,337 - INFO - Test completed in roundabout - Avg Reward: 11.09090909090909, Total Collisions: 0, Avg Traffic Speed: 15.41346263291739, Speed Variance: 1.9876690466336384
2024-11-27 14:10:35,338 - INFO - Directory created: results/agent_DQN
2024-11-27 14:10:35,340 - INFO - Data saved for pos_training_test in roundabout
2024-11-27 14:10:35,341 - INFO - Stage 3: Overtaking Environment
2024-11-27 14:10:54,516 - INFO - Test completed in overtaking - Avg Reward: 6.3999999999999995, Total Collisions: 0, Avg Traffic Speed: 20.734136222984855, Speed Variance: 1.507574867797725
2024-11-27 14:10:54,518 - INFO - Directory created: results/agent_DQN
2024-11-27 14:10:54,520 - INFO - Data saved for p

Wrapping the env in a DummyVecEnv.


2024-11-27 14:11:29,172 - INFO - Directory created: models/agent_DQN
2024-11-27 14:11:29,179 - INFO - Directory created: results/agent_DQN
2024-11-27 14:11:29,179 - INFO - Data saved for train in overtaking
2024-11-27 14:11:29,180 - INFO - Training completed for agent_DQN in overtaking
2024-11-27 14:11:50,866 - INFO - Test completed in overtaking - Avg Reward: 8.975132223632718, Total Collisions: 0, Avg Traffic Speed: 20.67047268649021, Speed Variance: 1.120284505734788
2024-11-27 14:11:50,867 - INFO - Directory created: results/agent_DQN
2024-11-27 14:11:50,868 - INFO - Data saved for pos_training_test in overtaking
2024-11-27 14:11:50,869 - INFO - Sequential Environment Pipeline Completed
2024-11-27 14:11:50,870 - INFO - Directory created: models/agent_DQN/final


- ### Agent N

## Performance

In [119]:
# Display results
def display_results(agent_name, type="table"):
    # Load results
    results = {}
    for stage in ["lane_changing_pos_training_test", "roundabout_pre_training_test", "roundabout_pos_training_test", "overtaking_pre_training_test", "overtaking_pos_training_test"]:
        with open(f"results/{agent_name}/{stage}_data.json", 'r') as f:
            results[stage] = json.load(f)

    if type == "list":
        print(f"--- {agent_name} ---\n")
        for stage, data in results.items():
            print(f"> {stage}")
            print(f"    - Average Reward: {data['kpis']['average_reward']}")
            print(f"    - Total Collisions: {data['kpis']['total_collisions']}")
            print(f"    - Average Speed: {data['kpis']['average_speed']}")
            print(f"    - Speed Variance: {data['kpis']['speed_variance']}")
            print("")
        print("--------------------\n")
    elif type == "table":
        table = []
        for stage, data in results.items():
            table.append([stage, data['kpis']['average_reward'], data['kpis']['total_collisions'], data['kpis']['average_speed'], data['kpis']['speed_variance']])
        table = np.array(table)

        print("|".join([f"{"["+str(agent_name)+"]":<20} {str("Stage"):>10} ", "Average Reward".center(20), "Total Collisions".center(20), "Average Speed".center(20), "Speed Variance".center(20)]))
        print("=" * 117)
        for row in table:
            formatted_row = [
                f"{str(row[0]):>31} ",    # Stage (left aligned)
                f"{str(row[1]):^20}",    # Average Reward (center aligned)
                f"{str(row[2]):^20}",    # Total Collisions (center aligned)
                f"{str(row[3]):^20}",    # Average Speed (center aligned)
                f"{str(row[4]):^20}",     # Speed Variance (center aligned)
            ]
            print("|".join(formatted_row))
            print("-" * 117)
        print("\n")
        

In [136]:
display_results("agent_DQN")

[agent_DQN]               Stage |   Average Reward   |  Total Collisions  |   Average Speed    |   Speed Variance   
lane_changing_pos_training_test | 17.116009871911153 |         0          | 20.929875051604423 | 1.218203343970156  
---------------------------------------------------------------------------------------------------------------------
   roundabout_pre_training_test | 2.7727272727272725 |         1          | 14.570962810740308 | 2.9089537218699038 
---------------------------------------------------------------------------------------------------------------------
   roundabout_pos_training_test | 11.09090909090909  |         0          | 15.41346263291739  | 1.9876690466336384 
---------------------------------------------------------------------------------------------------------------------
   overtaking_pre_training_test | 6.3999999999999995 |         0          | 20.734136222984855 | 1.507574867797725  
-------------------------------------------------------------

## Analysis

## Conclusion