In [6]:
import torch

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)

from DeepQAgent import DeepQAgent
from TicTacToeGame import TicTacToeGame, OPPONENT_LEVEL

import os
import sys
sys.path.append(os.path.abspath('..'))

from Utils import (
    train_agent,
    test_agent,
    set_seed
)

sys.path.remove(os.path.abspath('..'))

SEED = 100
set_seed(SEED)

# DeepQ parameters
BATCH_SIZE     = 128
NUM_EPISODES   = 2000 if torch.cuda.is_available() else 100
STATE_SPACE    = 9
ACTION_SPACE   = 9
HIDDEN_SIZE    = 128
EPSILON        = 1.0
GAMMA          = 0.99
LEARNING_RATE  = 0.001
DROPOUT        = 0.25
TRAIN_START    = 1500
NEGATIVE_SLOPE = 0.01
COMPUTER_LEVEL = OPPONENT_LEVEL.NAIVE

# save path
MODEL_PATH = "../../../trained_models/ReinforcementLearning/TicTacToeV2"
MODEL_NAME = "TicTacToe-Baseline-Untrained"

def get_full_model_path():
  return os.path.join(MODEL_PATH, MODEL_NAME + ".pt")

def supply_model(load_if_exists: bool = True):
  
  agent = DeepQAgent(
      device         = DEVICE,
      epsilon        = EPSILON, 
      gamma          = GAMMA,
      state_space    = STATE_SPACE, 
      action_space   = ACTION_SPACE, 
      hidden_size    = HIDDEN_SIZE,
      dropout        = DROPOUT,
      train_start    = TRAIN_START,
      batch_size     = BATCH_SIZE,
      negative_slope = NEGATIVE_SLOPE
  )

  if load_if_exists and os.path.exists(full_model_path):
    print("Loading Model Parameters...")
    full_model_path = get_full_model_path()
    agent.load_model(filepath=full_model_path)
  
  optimizer = torch.optim.Adam(agent.parameters(), lr=LEARNING_RATE)
  criterion = torch.nn.SmoothL1Loss() # Huber Loss
  
  return agent, optimizer, criterion

cuda


Below is an untrained model whose parameters were initialized randomly. Its performance is random.

In [2]:
baseline, _, _ = supply_model(load_if_exists=False)
environment = TicTacToeGame(DEVICE, None, OPPONENT_LEVEL.NAIVE, start_as_X=False)
test_agent(baseline, environment, 10000)

100%|██████████| 10000/10000 [00:50<00:00, 198.97it/s]


Win rate:  55.75%
Draw rate: 8.81%
Loss rate: 35.44%





In [3]:
baseline.save_model(MODEL_PATH, MODEL_NAME)

Model saved to '../../../trained_models/TicTacToeV2\TicTacToe-Baseline-Untrained.pt'.


'../../../trained_models/TicTacToeV2\\TicTacToe-Baseline-Untrained.pt'