In [1]:
# imports from libraries
import gymnasium as gym
import torch
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output

# imports from modules
from agents.dqn_agent import Agent
from utils.scheduler import HyperparameterScheduler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# helper function for preprocessing (same as training)
def preprocess(obs):
    obs = np.mean(obs, axis=2)
    obs = np.expand_dims(obs, axis=0)
    return torch.FloatTensor([obs]).to(device)

# function to render environment
def render_episode(env, agent, model_path):
    # load the trained model
    checkpoint = torch.load(model_path, map_location=device)
    agent.policy_net.load_state_dict(checkpoint['model_state_dict'])
    agent.policy_net.eval()
    
    obs, _ = env.reset()
    state = preprocess(obs).to(device)
    total_reward = 0
    done = False
    truncated = False
    
    while not (done or truncated):
        clear_output(wait=True)
        env.render()
        
        with torch.no_grad():
            action = agent.select_action(state, eval_mode=True)
        
        obs, reward, done, truncated, _ = env.step(action)
        next_state = preprocess(obs)
        state = next_state
        total_reward += reward
        
    env.close()
    return total_reward

# create environment and agent
env = gym.make('CarRacing-v3', continuous=True, render_mode='human')
agent = Agent(state_shape=(1, 96, 96), n_actions=5, scheduler=HyperparameterScheduler())

# path to model
model_path = 'experiment_20241204_230204/best_model.pth'

try:
    reward = render_episode(env, agent, model_path)
    print(f"Episode finished with reward: {reward:.2f}")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    env.close()

Episode finished with reward: 896.61


In [2]:
def test_multiple_episodes(env, agent, model_path, num_episodes=10):
    checkpoint = torch.load(model_path)
    agent.policy_net.load_state_dict(checkpoint['model_state_dict'])
    agent.policy_net.eval()
    
    rewards = []
    for episode in range(num_episodes):
        obs, _ = env.reset()
        state = preprocess(obs)
        episode_reward = 0
        done = False
        truncated = False
        
        while not (done or truncated):
            with torch.no_grad():
                action = agent.select_action(state, eval_mode=True)
            obs, reward, done, truncated, _ = env.step(action)
            next_state = preprocess(obs)
            state = next_state
            episode_reward += reward
            
        rewards.append(episode_reward)
        print(f"Episode {episode + 1}/{num_episodes}: Reward = {episode_reward:.2f}")
    
    return rewards

In [3]:
def plot_test_rewards(rewards):
    plt.figure(figsize=(10, 5))
    plt.plot(rewards)
    plt.title('Test Episode Rewards')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.grid(True)
    plt.show()
    
    print(f"Average reward: {np.mean(rewards):.2f}")
    print(f"Standard deviation: {np.std(rewards):.2f}")

In [None]:
# test multiple episodes
rewards = []
for _ in range(10):
    env = gym.make('CarRacing-v3', continuous=True, render_mode='human')
    rewards.append(test_multiple_episodes(env, agent, model_path, num_episodes=1))
    env.close()

# plot the results
plot_test_rewards(rewards)

In [4]:
import pandas as pd
from datetime import datetime

def test_model(env, agent, model_path=None, num_episodes=100):
    """
    Test either a trained model or dummy model for multiple episodes
    """
    model_type = "trained" if model_path else "dummy"
    
    if model_path is not None:
        checkpoint = torch.load(model_path, map_location=device)
        agent.policy_net.load_state_dict(checkpoint['model_state_dict'])
        agent.policy_net = agent.policy_net.to(device)
    else:
        agent.policy_net = agent.policy_net.to(device)
    
    agent.policy_net.eval()
    
    # Open CSV file
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f'results_{model_type}_{timestamp}.csv'
    
    with open(filename, 'w') as f:
        f.write('episode,reward,steps,model_type\n')  # CSV header
        f.flush()  # Ensure header is written
        
        for episode in range(num_episodes):
            try:
                obs, _ = env.reset()
                state = preprocess(obs).to(device)
                episode_reward = 0
                done = False
                truncated = False
                steps = 0
                
                while not (done or truncated):
                    try:
                        with torch.no_grad():
                            action = agent.select_action(state, eval_mode=True)
                        obs, reward, done, truncated, _ = env.step(action)
                        next_state = preprocess(obs).to(device)
                        state = next_state
                        episode_reward += reward
                        steps += 1
                        
                        # Add a step limit to prevent infinite loops
                        if steps > 1000:  # Adjust this number as needed
                            truncated = True
                            
                    except Exception as e:
                        print(f"Error during episode step: {e}")
                        break
                
                # Write results directly to CSV and flush to ensure writing
                f.write(f'{episode+1},{episode_reward},{steps},{model_type}\n')
                f.flush()
                
                if (episode + 1) % 10 == 0:
                    print(f"Episode {episode + 1}/{num_episodes}: Reward = {episode_reward:.2f}")
                    
            except Exception as e:
                print(f"Error in episode {episode + 1}: {e}")
                # Write error case to CSV
                f.write(f'{episode+1},0,0,{model_type}\n')
                f.flush()
    
    print(f"Results saved to {filename}")

In [5]:
# Test both models
env = gym.make('CarRacing-v3', continuous=True)
agent = Agent(state_shape=(1, 96, 96), n_actions=5, scheduler=HyperparameterScheduler())

# Test dummy model
print("\nTesting dummy model...")
test_model(env, agent, model_path=None, num_episodes=100)

# Test trained model
print("\nTesting trained model...")
model_path = 'experiment_20241204_230204/best_model.pth'
test_model(env, agent, model_path=model_path, num_episodes=100)

env.close()


Testing dummy model...
Episode 10/100: Reward = -93.29
Episode 20/100: Reward = -93.85
Episode 30/100: Reward = -92.83
Episode 40/100: Reward = -93.22
Episode 50/100: Reward = -93.46
Episode 60/100: Reward = -92.57
Episode 70/100: Reward = -92.78
Episode 80/100: Reward = -92.93
Episode 90/100: Reward = -93.10
Episode 100/100: Reward = -93.15
Results saved to results_dummy_20241207_135353.csv

Testing trained model...


  checkpoint = torch.load(model_path, map_location=device)


Episode 10/100: Reward = 922.70
Episode 20/100: Reward = 890.35
Episode 30/100: Reward = 927.80
Episode 40/100: Reward = 933.80
Episode 50/100: Reward = 939.70
Episode 60/100: Reward = 855.48
Episode 70/100: Reward = 920.20
Episode 80/100: Reward = 938.60
Episode 90/100: Reward = 935.10
Episode 100/100: Reward = 932.70
Results saved to results_trained_20241207_141618.csv


In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.boxplot([
    results_df[results_df['model_type'] == 'dummy']['reward'],
    results_df[results_df['model_type'] == 'trained']['reward']
], labels=['Dummy', 'Trained'])
plt.title('Reward Distribution')
plt.ylabel('Reward')

plt.subplot(1, 2, 2)
plt.plot(results_df[results_df['model_type'] == 'dummy']['episode'], 
         results_df[results_df['model_type'] == 'dummy']['reward'], 
         label='Dummy')
plt.plot(results_df[results_df['model_type'] == 'trained']['episode'], 
         results_df[results_df['model_type'] == 'trained']['reward'], 
         label='Trained')
plt.title('Rewards Over Episodes')
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.legend()
plt.tight_layout()
plt.show()