### Imports

In [5]:
import random

import gymnasium as gym
from gymnasium.wrappers import RecordVideo
import ale_py
import torch
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
import numpy as np

from atari_rl.rl.agent import Agent
from atari_rl.rl.utils import prepost_frame
from atari_rl.rl.frame_stacker import FrameStacker
from atari_rl.il.expert_dataset import ExpertDataset, StateAction
from atari_rl.il.expert_dataset_wrapper import ExpertDatasetWrapper

### Hyperparameters

In [6]:
# Game parameters
GAME_NAME = "MsPacman-v5"
RL_ALGORITHM = "IL"
NUM_ACTIONS = 5

# Agent parameters
IMAGE_SIZE = 84
FRAME_STACK_SIZE = 4
FRAME_SKIP_SIZE = 4

# Imitation Learning parameters
LEARNING_RATE = 1e-4
EXPERT_NAME = "expert_dataset_21668"
LOADED_SIZE = 6
BATCH_SIZE = 2
EPOCHS = 200

# Evaluation parameters
MODEL_NAME = "DQN_MsPacman-v5_290_10700.pt"
SAVE_MODEL = True
LOAD_MODEL = False

MAX_STEP_PER_EPISODE = 10000
NUM_EPISODES_EVAL = 10
EPSILON_EVAL = 0.05
USE_DETERMINISTIC_EVAL = False
USE_EPSILON_EVAL = True
TEMPERATURE_EVAL = 1

### Prepost

In [7]:
obs_shape = (FRAME_STACK_SIZE, IMAGE_SIZE, IMAGE_SIZE)
expert_dataset = ExpertDataset(obs_shape, NUM_ACTIONS, expert_name=EXPERT_NAME)

In [8]:
# Load portion of the expert dataset
dfsample = expert_dataset.sample(LOADED_SIZE)
dfsample

[StateAction(state=array([[[0.4862745 , 0.4862745 , 0.3529412 , ..., 0.3529412 ,
          0.4392157 , 0.4862745 ],
         [0.4862745 , 0.4862745 , 0.22352941, ..., 0.22352941,
          0.39607844, 0.4862745 ],
         [0.4862745 , 0.4862745 , 0.22352941, ..., 0.22352941,
          0.4       , 0.4862745 ],
         ...,
         [0.4862745 , 0.4862745 , 0.22352941, ..., 0.22352941,
          0.4       , 0.4862745 ],
         [0.4862745 , 0.4862745 , 0.22352941, ..., 0.22352941,
          0.39607844, 0.4862745 ],
         [0.4862745 , 0.4862745 , 0.3529412 , ..., 0.3529412 ,
          0.4392157 , 0.4862745 ]],
 
        [[0.4862745 , 0.4862745 , 0.3529412 , ..., 0.3529412 ,
          0.4392157 , 0.4862745 ],
         [0.4862745 , 0.4862745 , 0.22352941, ..., 0.22352941,
          0.39607844, 0.4862745 ],
         [0.4862745 , 0.4862745 , 0.22352941, ..., 0.22352941,
          0.4       , 0.4862745 ],
         ...,
         [0.4862745 , 0.4862745 , 0.22352941, ..., 0.22352941,
      

In [12]:
dataset = ExpertDatasetWrapper(dfsample)
dataset

<atari_rl.il.expert_dataset_wrapper.ExpertDatasetWrapper at 0x168278f6c00>

In [14]:
dataloader = DataLoader(dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4)
dataloader

<torch.utils.data.dataloader.DataLoader at 0x168288da630>

In [17]:
for idx, data in enumerate(dataloader):
    print(data[0].shape, data[1].shape)
    break

torch.Size([2, 4, 84, 84]) torch.Size([2])


### Traiin

In [18]:
if torch.cuda.is_available():
    print("Training optimized with CUDA")
    device = torch.device("cuda")
else:
    print("Training with CPU")
    device = torch.device("cpu")

Training optimized with CUDA


In [19]:
agent = Agent(obs_shape,
    NUM_ACTIONS,
    device)

In [43]:
for idx, data in enumerate(dataloader):
    outputs = agent.policy_net(data[0].to(device))
    labels = data[1].to(device)
    break
outputs

tensor([[-0.0285, -0.0145, -0.0344, -0.0043, -0.0267],
        [-0.0278, -0.0146, -0.0353, -0.0046, -0.0268]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [44]:
# outputs = torch.argmax(outputs, dim=1)
outputs

tensor([[-0.0285, -0.0145, -0.0344, -0.0043, -0.0267],
        [-0.0278, -0.0146, -0.0353, -0.0046, -0.0268]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [45]:
labels

tensor([3, 4], device='cuda:0')

In [46]:
criterion = torch.nn.CrossEntropyLoss()
criterion(outputs, labels)

tensor(1.6033, device='cuda:0', grad_fn=<NllLossBackward0>)