In [None]:
## This notebook is used to test the random and static players discussed in the blog post.
## Random players select an action randomly at each time step with uniform distribution.
## Static players always select the "collect" action and therefore never move.
## (Nx, Ny) is the dimensions of the board, which is typically set to either (4,4) or (8,8).
## N_test is the number of test episodes, which by default is set to 1024.
## Test results are given as total reward per episode, averaged over the test set.
## Simply running this notebook from beginning to end prints all the test results
## given in the blog post.

In [None]:
import torch
import numpy as np
import pandas as pd
import os
import gc
from torch import nn
import copy
import matplotlib.pyplot as plt

from data_factory import *
from model_factory import *
from rl import *
from test import *
from visual import *

In [None]:
Nx, Ny = 4, 4
N_test = 1024

simulator = TestSimulator(Nx, Ny)
simulator.init_games(N_test)

random_player = DummyWrapper(mode = "random")
random_player_test_result = simulator.test(random_player).item()

static_player = DummyWrapper(mode = "collect")
static_player_test_result = simulator.test(static_player).item()

print("Board Dimensions: %d x %d" % (Nx, Ny))
print("Number of Test Samples: %d" % N_test)
print("Random Player Test Result: %f" % random_player_test_result)
print("Static Player Test Result: %f" % static_player_test_result)

In [None]:
Nx, Ny = 8, 8
N_test = 1024

simulator = TestSimulator(Nx, Ny)
simulator.init_games(N_test)

random_player = DummyWrapper(mode = "random")
random_player_test_result = simulator.test(random_player).item()

static_player = DummyWrapper(mode = "collect")
static_player_test_result = simulator.test(static_player).item()

print("Board Dimensions: %d x %d" % (Nx, Ny))
print("Number of Test Samples: %d" % N_test)
print("Random Player Test Result: %f" % random_player_test_result)
print("Static Player Test Result: %f" % static_player_test_result)