In [2]:
import torch
import os
import supersuit as ss
from stable_baselines3 import PPO
from ctf_env import CaptureTheFlagPZ

# --- Load Model ---
# Set load_model to True if you want to continue training an existing model
load_model = True 
model_path = "ctf_champion.zip"


# --- Pick Device ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
print(torch.cuda.is_available())
print(torch.version.cuda)


# --- Grid Hyper Parameters ---
grid_size = 21
center_walls = 2 # The higher the number the fewer the walls  
mirrored_walls = 15 # The lower the number the fewer walls
# Example 
# First Training  (Easy) grid_size 12x12, center_walls 6, mirrored_walls 2 
# Second Training (Medium) grid_size 16x16, center_walls 4, mirrored_walls 8 
# Third Training  (Difficult/Default) grid_size 21x21, center_walls 2, mirrored_walls 15


# --- Setup ---
env = CaptureTheFlagPZ( 
    render_mode="rgb_array", 
    grid_size=grid_size, 
    center_walls=center_walls, 
    mirrored_walls=mirrored_walls

)

env = ss.resize_v1(env, x_size=84, y_size=84)
env = ss.color_reduction_v0(env, mode='full')
env = ss.frame_stack_v1(env, 3)

vec_env = ss.pettingzoo_env_to_vec_env_v1(env)
vec_env = ss.concat_vec_envs_v1(vec_env, num_vec_envs=4, num_cpus=0, base_class='stable_baselines3')

print(f"Observation Space: {vec_env.observation_space.shape}")
# Should be (84, 84, 3) -> 84x84 pixels, 3 stacked frames


# --- Load or Create Model ---
if load_model == True and os.path.exists(model_path):
    print(f"Loading existing model from {model_path}...")
    model = PPO.load(model_path, env=vec_env, device=device)

else:
    print(f"Training a new model...")
    model = PPO(
        "CnnPolicy", 
        vec_env, 
        verbose=1, 
        batch_size=512, 
        learning_rate=1e-4, 
        ent_coef=0.01,
        n_steps=2048,
        device=device
    )


# --- Train Model ---
print("Starting Training...")
model.learn(total_timesteps=30)
print("Training Finished!")

model.save("ctf_champion")
# 3 million timesteps takes 444 min (8h) to train on a CPU  


Using device: cpu
False
None
Observation Space: (84, 84, 3)
Loading existing model from ctf_champion.zip...
Wrapping the env in a VecTransposeImage.




Starting Training...




------------------------------
| time/              |       |
|    fps             | 285   |
|    iterations      | 1     |
|    time_elapsed    | 114   |
|    total_timesteps | 32768 |
------------------------------
Training Finished!


In [9]:
import torch

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("GPU name:", torch.cuda.get_device_name(torch.cuda.current_device()))

PyTorch version: 2.9.1+cu128
CUDA available: True
CUDA version: 12.8
GPU name: NVIDIA GeForce RTX 5070 Ti


In [2]:
import supersuit as ss
from stable_baselines3 import PPO
from ctf_env import CaptureTheFlagPZ

#  Load the "Old" Champion (Gen 1)
# We load it on CPU to avoid errors
old_champion = PPO.load("ctf_champion", device="cpu")

# 2. Define the Training Environment
env = CaptureTheFlagPZ(render_mode="rgb_array")
env = ss.resize_v1(env, x_size=84, y_size=84)
env = ss.color_reduction_v0(env, mode='full')
env = ss.frame_stack_v1(env, 3)

# 3. The "Gauntlet" Wrapper
# We need a way to tell the environment: "Blue actions come from the Old Model, Red actions come from the New Model"
# PettingZoo doesn't support this out of the box easily, so we usually just run a custom loop or
# use a library like 'shimmy' to convert it to a Single-Agent Gym environment where the Opponent is part of the environment.

print("To train Gen 2 vs Gen 1, we need to convert the environment so 'Blue' is just a part of the game (like a moving wall).")

To train Gen 2 vs Gen 1, we need to convert the environment so 'Blue' is just a part of the game (like a moving wall).


# Advanced Training
Train the same model multiple times with different parameters

In [None]:
import torch
import supersuit as ss
from stable_baselines3 import PPO
from ctf_env import CaptureTheFlagPZ

# --- Load Model ---
# Set load_model to True if you want to continue training an existing model
base_folder = "models"
model_name = "bob"

# --- Pick Device ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
print(torch.cuda.is_available())
print(torch.version.cuda)


# --- PPO Hyper Parameters ---
# Training sets controls how many times you train the model
training_sets=3
learning_rate=[1e-4, 5e-5, 1e-5]
ent_coef=[0.01, 0.01, 0.02]
total_timesteps = [1_000_000, 1_000_000, 1_000_000]
#total_timesteps = [30, 30, 30]

# --- Grid Hyper Parameters ---
grid_size = [12,16,21]
center_walls = [6,4,2] # The higher the number the fewer the walls  
mirrored_walls = [2,6,15] # The lower the number the fewer walls
# Example 
# First Training  (Easy) grid_size 12x12, center_walls 6, mirrored_walls 2 
# Second Training (Medium) grid_size 16x16, center_walls 4, mirrored_walls 8 
# Third Training  (Difficult/Default) grid_size 21x21, center_walls 2, mirrored_walls 15


for i in range(training_sets):
    model_path = f"{base_folder}/{model_name}/{model_name}" 
    old_suffix = f"_v{i}.zip"
    new_suffix = f"_v{i+1}.zip"

    env = CaptureTheFlagPZ( 
        render_mode="rgb_array", 
        grid_size=grid_size[i], 
        center_walls=center_walls[i], 
        mirrored_walls=mirrored_walls[i]
    )

    # --- Setup ---
    env = ss.resize_v1(env, x_size=84, y_size=84)
    env = ss.color_reduction_v0(env, mode='full')
    env = ss.frame_stack_v1(env, 3)

    vec_env = ss.pettingzoo_env_to_vec_env_v1(env)
    vec_env = ss.concat_vec_envs_v1(vec_env, num_vec_envs=4, num_cpus=0, base_class='stable_baselines3')


    # --- Create Model ---
    if i == 0:

        print(f"Training a new model...")
        model = PPO(
            "CnnPolicy", 
            vec_env, 
            verbose=1, 
            batch_size=512, 
            learning_rate=learning_rate[i], 
            ent_coef=ent_coef[i],
            n_steps=2048,
            device=device
        )

    else:
        print(f"Loading existing model from {model_path}...")
        model = PPO.load(
            model_path+old_suffix, 
            env=vec_env, 
            device=device,
            custom_objects={
                "learning_rate": learning_rate[i],
                "ent_coef": ent_coef[i]
            }
        )
 


    # --- Train Model ---
    print(f"\n\nStarting Training Session {i+1}...")
    model.learn(total_timesteps=total_timesteps[i])
    print(f"Training Session {i+1} Finished!")

    model.save(model_path+new_suffix)
    # 3 million timesteps takes 444 min (8h) to train on a CPU  


Using device: cpu
False
None
Training a new model...
Using cpu device
Wrapping the env in a VecTransposeImage.




Starting Training Session 0...




------------------------------
| time/              |       |
|    fps             | 413   |
|    iterations      | 1     |
|    time_elapsed    | 79    |
|    total_timesteps | 32768 |
------------------------------
Training Session 0 Finished!
Loading existing model from models/bob/bob...




Wrapping the env in a VecTransposeImage.
Starting Training Session 1...
------------------------------
| time/              |       |
|    fps             | 214   |
|    iterations      | 1     |
|    time_elapsed    | 152   |
|    total_timesteps | 32768 |
------------------------------
Training Session 1 Finished!
Loading existing model from models/bob/bob...
Wrapping the env in a VecTransposeImage.
Starting Training Session 2...
------------------------------
| time/              |       |
|    fps             | 242   |
|    iterations      | 1     |
|    time_elapsed    | 134   |
|    total_timesteps | 32768 |
------------------------------
Training Session 2 Finished!
