# Multi-Car Racing PPO - Evaluation & Training

This notebook allows you to:
1. **Evaluate** a trained model (upload your `.zip` file) and see stats/video.
2. **Train** a new PPO agent from scratch.

## Setup Instructions

1. Make sure GPU is enabled: Runtime -> Change runtime type -> GPU
2. Run the **Install Dependencies** cell first.
3. Choose your path: **Evaluation** or **Training** below.

In [None]:
# Install dependencies
!pip install -q "numpy>=1.22.0,<1.23.0"
!pip install -q gym==0.17.3
!pip install -q stable-baselines3[extra]==1.8.0
!pip install -q matplotlib>=3.7.0 opencv-python>=4.8.0 tensorboard>=2.13.0 pyyaml>=6.0 pyglet==1.5.27 moviepy
!pip install -q git+https://github.com/igilitschenski/multi_car_racing.git

# Fix for Colab display
!apt-get install -y xvfb python-opengl > /dev/null 2>&1

print("All dependencies installed successfully!")

In [None]:
# Setup Environment and Wrapper (Run this cell!)
import gym
import gym_multi_car_racing
import numpy as np
import cv2
import os
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecTransposeImage
from IPython.display import Video, display

class SingleAgentWrapper(gym.Wrapper):
    """Wrap MultiCarRacing to expose a single-agent view."""

    def __init__(self, env):
        super().__init__(env)
        obs_space = env.observation_space
        act_space = env.action_space

        if len(obs_space.shape) == 4 and obs_space.shape[0] == 1:
            self.observation_space = gym.spaces.Box(
                low=obs_space.low[0],
                high=obs_space.high[0],
                shape=obs_space.shape[1:],
                dtype=obs_space.dtype
            )
        if len(act_space.shape) == 2 and act_space.shape[0] == 1:
            self.action_space = gym.spaces.Box(
                low=act_space.low[0],
                high=act_space.high[0],
                shape=act_space.shape[1:],
                dtype=act_space.dtype
            )

    def reset(self, **kwargs):
        # Gym 0.17.3 reset() returns just obs
        obs = self.env.reset(**kwargs)
        if hasattr(obs, "shape") and len(obs.shape) == 4 and obs.shape[0] == 1:
            obs = obs[0]
        elif isinstance(obs, (list, tuple)) and len(obs) == 1:
            obs = obs[0]
        return obs

    def step(self, action):
        if hasattr(self.env.action_space, "shape") and len(self.env.action_space.shape) == 2:
            action = action.reshape(1, -1)
        obs, reward, done, info = self.env.step(action)
        if hasattr(obs, "shape") and len(obs.shape) == 4 and obs.shape[0] == 1:
            obs = obs[0]
        elif isinstance(obs, (list, tuple)) and len(obs) == 1:
            obs = obs[0]
        if isinstance(reward, (list, tuple)) or (hasattr(reward, "shape") and len(reward.shape) > 0 and reward.shape[0] == 1):
            reward = float(reward[0] if isinstance(reward, (list, tuple)) else reward[0])
        return obs, reward, done, info

    def render(self, mode='human', **kwargs):
        out = self.env.render(mode=mode, **kwargs)
        if hasattr(out, "shape") and len(out.shape) == 4 and out.shape[0] == 1:
            out = out[0]
        return out

def create_eval_env():
    env = gym.make(
        'MultiCarRacing-v0',
        num_agents=1,
        direction='CCW',
        use_random_direction=True,
        backwards_flag=True,
        h_ratio=0.25,
        use_ego_color=False
    )
    env = SingleAgentWrapper(env)
    return env

print("Environment classes defined.")

## Option 1: Evaluate Uploaded Model

Use this section to upload your trained model (`.zip`) and see how it performs.

In [None]:
from google.colab import files
import os

print("Please upload your model file (.zip file, e.g., final_model.zip)")
uploaded = files.upload()

if uploaded:
    model_filename = list(uploaded.keys())[0]
    print(f"\nModel uploaded: {model_filename}")
else:
    print("No file uploaded.")

In [None]:
# Run Evaluation
if 'model_filename' in locals():
    print(f"Loading model: {model_filename}...")
    
    # Load model
    model = PPO.load(model_filename)
    
    # Create evaluation environment
    eval_env = DummyVecEnv([create_eval_env])
    eval_env = VecTransposeImage(eval_env)
    
    # Config
    n_episodes = 3
    video_path = 'eval_video.mp4'
    
    print(f"Evaluating for {n_episodes} episodes...")
    
    # Video Writer
    obs = eval_env.reset()
    # Get formatting from first frame
    temp_env = create_eval_env()
    temp_env.reset()
    frame = temp_env.render(mode='rgb_array')
    height, width, _ = frame.shape
    temp_env.close()
    
    out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (width, height))
    
    episode_rewards = []
    
    for val_ep in range(n_episodes):
        obs = eval_env.reset()
        done = False
        total_reward = 0
        
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = eval_env.step(action)
            total_reward += float(reward[0])
            
            # Render frame
            # Access the underlying env to get the render
            frame = eval_env.envs[0].render(mode='rgb_array')
            if frame is not None:
                # Convert RGB to BGR for OpenCV
                frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                out.write(frame_bgr)
        
        episode_rewards.append(total_reward)
        print(f"Episode {val_ep+1}: Reward = {total_reward:.2f}")
    
    out.release()
    eval_env.close()
    
    print(f"\nMean Reward: {np.mean(episode_rewards):.2f} Â± {np.std(episode_rewards):.2f}")
    print(f"Video saved to {video_path}")
    
    # Display Video
    print("\n Displaying Video:")
    display(Video(video_path, embed=True, html_attributes='controls loop autoplay'))
else:
    print("Please upload a model first!")

## Option 2: Train New Model

Run the cells below to train a new model from scratch.

In [None]:
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.utils import set_random_seed
import torch

# Initialize Config
seed = 42
set_random_seed(seed)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Directories
os.makedirs('models', exist_ok=True)
os.makedirs('logs', exist_ok=True)

# Create Env
env = DummyVecEnv([create_eval_env])  # Re-use the creator
env = VecTransposeImage(env)

# Create Model
model = PPO(
    policy='CnnPolicy',
    env=env,
    learning_rate=3e-4,
    n_steps=2048,
    batch_size=64,
    n_epochs=10,
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.2,
    ent_coef=0.01,
    vf_coef=0.5,
    max_grad_norm=0.5,
    device=device,
    verbose=1,
    tensorboard_log='./logs'
)

print(f"Training on {device}...")

# Callback
checkpoint_callback = CheckpointCallback(
    save_freq=50000,
    save_path='./models',
    name_prefix='ppo_racecar'
)

# Train
model.learn(
    total_timesteps=100000,  # Adjust as needed
    callback=checkpoint_callback,
    progress_bar=True
)

model.save("final_model")
print("Training complete.")

In [None]:
# Visualize with TensorBoard
%load_ext tensorboard
%tensorboard --logdir ./logs