# RL for Autonomous Vehicle Tasks: Safety and Traffic Optimization

## Agents

In [1]:
# TODO: define agents here

## Scenarios

In [2]:
import gymnasium
import highway_env
from matplotlib import pyplot as plt
%matplotlib inline
from stable_baselines3 import DQN
import logging
import json
import os



### Lane Changing

In [3]:
lane_changing_env = gymnasium.make('highway-v0', render_mode='rgb_array')

### Roundabout

In [4]:
roundabout_env = gymnasium.make('roundabout-v0', render_mode='rgb_array')

### Overtaking

In [5]:
overtaking_env = gymnasium.make('highway-v0', render_mode='rgb_array')

## Training and Testing

In [None]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def create_directory(path):
    """
    Create directory if it doesn't exist
    
    Args:
        path (str): Directory path to create
    """
    try:
        os.makedirs(path, exist_ok=True)
        logger.info(f"Directory created: {path}")
    except Exception as e:
        logger.error(f"Error creating directory {path}: {e}")

def save_pipeline_data(agent_name, stage, environment_name, data):
    """
    Save pipeline stage data
    
    Args:
        agent_name (str): Name of the RL agent
        stage (str): Current pipeline stage
        environment_name (str): Name of the environment
        data (dict): Data to be saved
    """
    create_directory(f"results/{agent_name}")
    
    try:
        filename = f"results/{agent_name}/{environment_name}_data.json"
        with open(filename, 'w') as f:
            json.dump(data, f, indent=4)
        logger.info(f"Data saved for {stage} in {environment_name}")
    except Exception as e:
        logger.error(f"Error saving data: {e}")

In [None]:
def test(agent, environment, agent_name, stage, environment_name, num_episodes=3):
    """
    Test the agent in a specific environment and record results
    
    Args:
        agent (sb3.BaseAlgorithm): RL agent to test
        environment (gym.Env): Environment to test in
        agent_name (str): Name of the agent
        stage (str): Current pipeline stage
        environment_name (str): Name of the environment
        num_episodes (int, optional): Number of test episodes. Defaults to 3.
    
    Returns:
        List of test episode data
    """
    test_results = []
    
    for episode in range(num_episodes):
        try:
            obs, info = environment.reset()
            done = truncated = False
            episode_data = {
                'episode': episode,
                'total_reward': 0,
                'steps': [],
                'final_info': None
            }
            
            while not (done or truncated):
                action, _states = agent.predict(obs, deterministic=True)
                obs, reward, done, truncated, info = environment.step(action)
                
                episode_data['total_reward'] += reward
                episode_data['steps'].append({
                    'observation': obs.tolist() if hasattr(obs, 'tolist') else str(obs),
                    'action': action.tolist() if hasattr(action, 'tolist') else str(action),
                    'reward': reward
                })
                
                # Optional rendering (comment out if not needed)
                environment.render()
            
            episode_data['final_info'] = info
            test_results.append(episode_data)
            environment.close()
            logger.info(f"Test episode {episode + 1} completed in {environment_name}")
        
        except Exception as e:
            logger.error(f"Test episode {episode + 1} failed in {environment_name}: {e}")
    
    # Save test results
    save_pipeline_data(agent_name, stage, environment_name, {
        'test_results': test_results,
        'environment': environment_name,
        'stage': stage
    })
    
    return test_results

def train(agent, environment, agent_name, stage, environment_name, timesteps=100):
    """
    Train the agent in a specific environment and save the model
    
    Args:
        agent (sb3.BaseAlgorithm): RL agent to train
        environment (gym.Env): Environment to train in
        agent_name (str): Name of the agent
        stage (str): Current pipeline stage
        environment_name (str): Name of the environment
        timesteps (int, optional): Number of training timesteps. Defaults to 100.
    
    Returns:
        Trained agent
    """
    try:
        # Set environment and learn
        agent.set_env(environment)
        training_results = agent.learn(total_timesteps=timesteps)
        
        # Create directories for model and results
        create_directory(f"models/{agent_name}/{stage}")
        
        # Save model
        agent.save(f"models/{agent_name}/{stage}/{environment_name}_model")
        
        # Prepare and save training information
        training_info = {
            'agent_name': agent_name,
            'environment_name': environment_name,
            'stage': stage,
            'total_timesteps': timesteps,
            'training_results': str(training_results)
        }
        save_pipeline_data(agent_name, stage, environment_name, training_info)
        
        logger.info(f"Training completed for {agent_name} in {environment_name}")
        return agent
    
    except Exception as e:
        logger.error(f"Training failed for {agent_name} in {environment_name}: {e}")
        raise

def rl_pipeline(initial_agent, agent_name, lane_changing_env, roundabout_env, overtaking_env):
    """
    Sequential environment training pipeline
    
    Args:
        initial_agent (sb3.BaseAlgorithm): Initial RL agent
        agent_name (str): Name of the agent
        lane_changing_env (gym.Env): Lane changing environment
        roundabout_env (gym.Env): Roundabout environment
        overtaking_env (gym.Env): Overtaking environment
    
    Returns:
        Trained agent
    """
    logger.info("Sequential Environment Pipeline Started")
    
    # Stage 1: Lane Changing
    logger.info("Stage 1: Lane Changing Environment")
    
    # Train in Lane Changing
    lane_changing_agent = train(
        initial_agent, lane_changing_env, 
        agent_name, 'lane_changing', 'lane_changing'
    )
    
    # Test in Lane Changing
    test(
        lane_changing_agent, lane_changing_env, 
        agent_name, 'pos_training_test', 'lane_changing'
    )
    
    # Stage 2: Roundabout
    logger.info("Stage 2: Roundabout Environment")
    
    # First, test the lane changing agent in roundabout
    test(
        lane_changing_agent, roundabout_env, 
        agent_name, 'pre_training_test', 'roundabout'
    )
    
    # Then train in Roundabout
    roundabout_agent = train(
        lane_changing_agent, roundabout_env, 
        agent_name, 'roundabout', 'roundabout'
    )
    
    # Test in Roundabout
    test(
        roundabout_agent, roundabout_env, 
        agent_name, 'pos_train_test', 'roundabout'
    )
    
    # Stage 3: Overtaking
    logger.info("Stage 3: Overtaking Environment")
    
    # First, test the roundabout agent in overtaking
    test(
        roundabout_agent, overtaking_env, 
        agent_name, 'pre_training_test', 'overtaking'
    )
    
    # Then train in Overtaking
    overtaking_agent = train(
        roundabout_agent, overtaking_env, 
        agent_name, 'overtaking', 'overtaking'
    )
    
    # Test in Overtaking
    test(
        overtaking_agent, overtaking_env, 
        agent_name, 'pos_training_test', 'overtaking'
    )
    
    logger.info("Sequential Environment Pipeline Completed")

    # Save the final agent
    create_directory(f"models/{agent_name}/final")
    overtaking_agent.save(f"models/{agent_name}/{agent_name}_final_model")
    
    return overtaking_agent

- ### Agent 1

In [8]:
# Execute the pipeline to agent 1

# a1_results = pipeline(...)

# Display Results

agent_DQN = DQN('MlpPolicy', lane_changing_env,
        policy_kwargs=dict(net_arch=[256, 256]),
        learning_rate=5e-4,
        buffer_size=15000,
        learning_starts=200,
        batch_size=32,
        gamma=0.8,
        train_freq=1,
        gradient_steps=1,
        target_update_interval=50,
        verbose=1)

final_agent_DQN = rl_pipeline(agent_DQN, 'agent_DQN', lane_changing_env, roundabout_env, overtaking_env)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


2024-11-21 10:10:05,998 - INFO - Sequential Environment Pipeline Started
2024-11-21 10:10:05,998 - INFO - Stage 1: Lane Changing Environment


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 7        |
|    ep_rew_mean      | 5.3      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 3        |
|    time_elapsed     | 9        |
|    total_timesteps  | 28       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 5.75     |
|    ep_rew_mean      | 4.16     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 2        |
|    time_elapsed     | 15       |
|    total_timesteps  | 46       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 7.25     |
|    ep_rew_mean      | 5.35     |
|    explorat

2024-11-21 10:10:39,352 - INFO - Directory created: models/agent_DQN/stage1_lane_changing
2024-11-21 10:10:39,358 - INFO - Directory created: results/agent_DQN
2024-11-21 10:10:39,359 - INFO - Data saved for stage1_lane_changing in lane_changing
2024-11-21 10:10:39,360 - INFO - Training completed for agent_DQN in lane_changing
2024-11-21 10:10:50,952 - INFO - Test episode 1 completed in lane_changing
2024-11-21 10:11:00,075 - INFO - Test episode 2 completed in lane_changing
2024-11-21 10:11:10,181 - INFO - Test episode 3 completed in lane_changing
2024-11-21 10:11:10,182 - INFO - Directory created: results/agent_DQN
2024-11-21 10:11:10,188 - ERROR - Error saving data: Object of type ndarray is not JSON serializable
2024-11-21 10:11:10,189 - INFO - Stage 2: Roundabout Environment
2024-11-21 10:11:13,106 - INFO - Test episode 1 completed in roundabout
2024-11-21 10:11:16,127 - INFO - Test episode 2 completed in roundabout
2024-11-21 10:11:19,081 - INFO - Test episode 3 completed in round

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 11       |
|    ep_rew_mean      | 9.67     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 27       |
|    time_elapsed     | 1        |
|    total_timesteps  | 44       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 9.12     |
|    ep_rew_mean      | 7.99     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 27       |
|    time_elapsed     | 2        |
|    total_timesteps  | 73       |
----------------------------------


2024-11-21 10:11:22,775 - INFO - Directory created: models/agent_DQN/stage2_roundabout
2024-11-21 10:11:22,781 - INFO - Directory created: results/agent_DQN
2024-11-21 10:11:22,781 - INFO - Data saved for stage2_roundabout in roundabout
2024-11-21 10:11:22,782 - INFO - Training completed for agent_DQN in roundabout
2024-11-21 10:11:25,724 - INFO - Test episode 1 completed in roundabout
2024-11-21 10:11:28,668 - INFO - Test episode 2 completed in roundabout
2024-11-21 10:11:30,089 - INFO - Test episode 3 completed in roundabout
2024-11-21 10:11:30,092 - INFO - Directory created: results/agent_DQN
2024-11-21 10:11:30,095 - ERROR - Error saving data: Object of type ndarray is not JSON serializable
2024-11-21 10:11:30,097 - INFO - Stage 3: Overtaking Environment
2024-11-21 10:11:45,428 - INFO - Test episode 1 completed in overtaking
2024-11-21 10:11:47,918 - INFO - Test episode 2 completed in overtaking
2024-11-21 10:11:49,715 - INFO - Test episode 3 completed in overtaking
2024-11-21 10:1

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20       |
|    ep_rew_mean      | 15.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 3        |
|    time_elapsed     | 26       |
|    total_timesteps  | 80       |
----------------------------------


2024-11-21 10:12:22,382 - INFO - Directory created: models/agent_DQN/stage3_overtaking
2024-11-21 10:12:22,388 - INFO - Directory created: results/agent_DQN
2024-11-21 10:12:22,388 - INFO - Data saved for stage3_overtaking in overtaking
2024-11-21 10:12:22,389 - INFO - Training completed for agent_DQN in overtaking
2024-11-21 10:12:29,303 - INFO - Test episode 1 completed in overtaking
2024-11-21 10:12:31,069 - INFO - Test episode 2 completed in overtaking
2024-11-21 10:12:38,564 - INFO - Test episode 3 completed in overtaking
2024-11-21 10:12:38,565 - INFO - Directory created: results/agent_DQN
2024-11-21 10:12:38,575 - ERROR - Error saving data: Object of type ndarray is not JSON serializable
2024-11-21 10:12:38,576 - INFO - Sequential Environment Pipeline Completed


- ### Agent N

## Performance

## Analysis

## Conclusion