# E-puck Multi-Robot Simulation Demo

This notebook demonstrates the E-puck multi-robot simulation with visualization.

**Features:**
- Multi-robot formation control
- Safety constraint visualization
- Animated trajectory display

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ustbmicl-ros2epucksRL/safe-rl-manifold-suite/blob/master/Epuck_Colab_Demo.ipynb)

## 1. Installation

In [None]:
# Clone repository
!git clone https://github.com/ustbmicl-ros2epucksRL/safe-rl-manifold-suite.git
%cd safe-rl-manifold-suite

# Install dependencies
!pip install torch numpy matplotlib gymnasium hydra-core omegaconf -q
!pip install -e . -q

print("Installation complete!")

## 2. Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import HTML, display, Image

# Import COSMOS
from cosmos.envs.webots_wrapper import EpuckSimEnv
from cosmos.envs.epuck_visualizer import EpuckVisualizer, run_episode_with_visualization
from cosmos.algos.mappo import MAPPO
from cosmos.safety.cosmos_filter import CBFFilter

print("Libraries imported!")

## 3. Create Environment

In [None]:
# Create E-puck environment
env = EpuckSimEnv(
    num_agents=4,
    arena_size=1.0,
    max_steps=200,
    dt=0.064
)

print(f"Environment created!")
print(f"  - Number of agents: {env.num_agents}")
print(f"  - Observation dim: {env.get_obs_dim()}")
print(f"  - Action dim: {env.get_act_dim()}")
print(f"  - Arena size: {env._arena_size}m x {env._arena_size}m")

## 4. Visualize Single Frame

In [None]:
# Reset environment
obs, share_obs, info = env.reset(seed=42)

# Create visualizer
vis = EpuckVisualizer(env, show_sensors=True, show_goals=True)

# Render initial state
plt.figure(figsize=(8, 8))
vis.render(env)
plt.show()

## 5. Run Episode with Random Policy

In [None]:
# Run episode with random actions
result = run_episode_with_visualization(
    env,
    policy=None,  # Random policy
    max_steps=100,
    render_interval=10,
    save_animation=True,
    output_path="random_policy.gif"
)

print(f"\nEpisode finished!")
print(f"  - Total reward: {result['total_reward']:.2f}")
print(f"  - Total cost: {result['total_cost']:.2f}")
print(f"  - Steps: {result['steps']}")

In [None]:
# Display animation
Image(filename="random_policy.gif")

## 6. Run with CBF Safety Filter

In [None]:
# Create CBF safety filter
cbf = CBFFilter(
    env_cfg={'arena_size': env._arena_size, 'num_agents': env.num_agents},
    safety_cfg=None
)

def safe_random_policy(obs):
    """Random policy with CBF safety filtering."""
    # Generate random actions
    actions = np.random.uniform(-0.8, 0.8, (env.num_agents, env.get_act_dim()))
    
    # Apply CBF safety filter
    constraint_info = env.get_constraint_info()
    safe_actions = cbf.project(actions, constraint_info)
    
    return safe_actions, None

# Reset environment
env.reset(seed=123)

# Run with safety filter
result_safe = run_episode_with_visualization(
    env,
    policy=safe_random_policy,
    max_steps=150,
    render_interval=10,
    save_animation=True,
    output_path="safe_policy.gif"
)

print(f"\nSafe episode finished!")
print(f"  - Total reward: {result_safe['total_reward']:.2f}")
print(f"  - Total cost (collisions): {result_safe['total_cost']:.2f}")
print(f"  - Steps: {result_safe['steps']}")

In [None]:
# Display safe animation
Image(filename="safe_policy.gif")

## 7. Train MAPPO Policy

In [None]:
# Create MAPPO algorithm
mappo = MAPPO(
    obs_dim=env.get_obs_dim(),
    share_obs_dim=env.get_share_obs_dim(),
    act_dim=env.get_act_dim(),
    num_agents=env.num_agents,
    cfg={
        'actor_lr': 3e-4,
        'critic_lr': 3e-4,
        'gamma': 0.99,
        'gae_lambda': 0.95,
        'clip_param': 0.2,
        'ppo_epochs': 10,
        'num_mini_batch': 4,
    },
    device='cpu'
)

print("MAPPO created!")
print(f"  - Actor parameters: {sum(p.numel() for p in mappo.actor.parameters())}")
print(f"  - Critic parameters: {sum(p.numel() for p in mappo.critic.parameters())}")

In [None]:
# Quick training loop
from cosmos.buffers import RolloutBuffer
from tqdm.notebook import tqdm

# Create buffer
buffer = RolloutBuffer(
    episode_length=200,
    num_agents=env.num_agents,
    obs_dim=env.get_obs_dim(),
    share_obs_dim=env.get_share_obs_dim(),
    act_dim=env.get_act_dim(),
    gamma=0.99,
    gae_lambda=0.95
)

# Training
num_episodes = 50
episode_rewards = []

for episode in tqdm(range(num_episodes), desc="Training"):
    obs, share_obs, _ = env.reset(seed=episode)
    buffer.set_first_obs(obs, share_obs)
    
    ep_reward = 0
    
    for step in range(200):
        # Get actions from policy
        actions, log_probs = mappo.get_actions(obs)
        values = mappo.get_values(share_obs)
        
        # Apply CBF safety filter
        constraint_info = env.get_constraint_info()
        safe_actions = cbf.project(actions, constraint_info)
        
        # Step environment
        next_obs, next_share, rewards, costs, dones, infos, truncated = env.step(safe_actions)
        
        # Store in buffer
        masks = (~dones).astype(np.float32).reshape(-1, 1)
        buffer.insert(next_obs, next_share, actions, log_probs, values, 
                     rewards, costs, masks)
        
        ep_reward += rewards.sum()
        obs, share_obs = next_obs, next_share
        
        if dones.all() or truncated:
            break
    
    # Update policy
    last_values = mappo.get_values(share_obs)
    buffer.compute_returns_and_advantages(last_values)
    mappo.update(buffer)
    buffer.after_update()
    
    episode_rewards.append(ep_reward)
    
    if (episode + 1) % 10 == 0:
        print(f"Episode {episode+1}: Reward = {ep_reward:.2f}")

In [None]:
# Plot training curve
plt.figure(figsize=(10, 4))
plt.plot(episode_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Training Progress')
plt.grid(True, alpha=0.3)
plt.show()

## 8. Evaluate Trained Policy

In [None]:
def trained_policy(obs):
    """Trained MAPPO policy with CBF safety."""
    actions, log_probs = mappo.get_actions(obs, deterministic=True)
    constraint_info = env.get_constraint_info()
    safe_actions = cbf.project(actions, constraint_info)
    return safe_actions, log_probs

# Reset environment
env.reset(seed=999)

# Run evaluation
result_trained = run_episode_with_visualization(
    env,
    policy=trained_policy,
    max_steps=200,
    render_interval=10,
    save_animation=True,
    output_path="trained_policy.gif"
)

print(f"\nTrained policy evaluation:")
print(f"  - Total reward: {result_trained['total_reward']:.2f}")
print(f"  - Total cost: {result_trained['total_cost']:.2f}")
print(f"  - Steps: {result_trained['steps']}")

In [None]:
# Display trained policy animation
Image(filename="trained_policy.gif")

## 9. Compare Results

In [None]:
# Comparison table
import pandas as pd

comparison = pd.DataFrame({
    'Policy': ['Random', 'Random + CBF', 'MAPPO + CBF'],
    'Total Reward': [
        result['total_reward'],
        result_safe['total_reward'],
        result_trained['total_reward']
    ],
    'Collisions': [
        result['total_cost'],
        result_safe['total_cost'],
        result_trained['total_cost']
    ],
    'Steps': [
        result['steps'],
        result_safe['steps'],
        result_trained['steps']
    ]
})

print("\n" + "="*50)
print("Results Comparison")
print("="*50)
display(comparison)

## 10. Cleanup

In [None]:
env.close()
print("Environment closed.")