In [None]:
## This notebook makes all the plots and figures presented in the blog post.
## Simply running the notebook from beginning to end remakes all the plots
## and saves them in blog/plots.

In [None]:
import torch
import numpy as np
import pandas as pd
import os
import gc
from torch import nn
import copy
import matplotlib.pyplot as plt

from data_factory import *
from model_factory import *
from rl import *
from test import *
from visual import *



In [None]:
Nx, Ny = 4, 4
epochs_total = 120 * 8
epochs_per_valid = 8
x_axis = np.arange(0, epochs_total+1, epochs_per_valid)
ylim_lower, ylim_upper = -64, 0
xlim_lower, xlim_upper = 0, x_axis[-1]

qlearn_LR4 = QLearn(Nx=Nx, Ny=Ny, name="QLearn_QValueWide_Size4LR4", Q_class = QValueWide)
qlearn_LR4.model_factory.load()
qlearn_LR3 = QLearn(Nx=Nx, Ny=Ny, name="QLearn_QValueWide_Size4LR3", Q_class = QValueWide)
qlearn_LR3.model_factory.load()
qlearn_LR2 = QLearn(Nx=Nx, Ny=Ny, name="QLearn_QValueWide_Size4LR2", Q_class = QValueWide)
qlearn_LR2.model_factory.load()

ac_LR4 = AC(Nx=Nx, Ny=Ny, name="AC_ACValueWide_Size4LR4", actor_critic_class = ACValueWide)
ac_LR4.model_factory.load()
ac_LR3 = AC(Nx=Nx, Ny=Ny, name="AC_ACValueWide_Size4LR3", actor_critic_class = ACValueWide)
ac_LR3.model_factory.load()
ac_LR2 = AC(Nx=Nx, Ny=Ny, name="AC_ACValueWide_Size4LR2", actor_critic_class = ACValueWide)
ac_LR2.model_factory.load()

ppo_LR4 = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueWide_Size4LR4", actor_critic_class = ACValueWide)
ppo_LR4.model_factory.load()
ppo_LR3 = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueWide_Size4LR3", actor_critic_class = ACValueWide)
ppo_LR3.model_factory.load()
ppo_LR2 = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueWide_Size4LR2", actor_critic_class = ACValueWide)
ppo_LR2.model_factory.load()

fig, ax = plt.subplots(1, 3, figsize=(16,4))

ax[0].plot(x_axis, ppo_LR4.model_factory.loss_dict["score_valid"], label = "PPO")
ax[0].plot(x_axis, ac_LR4.model_factory.loss_dict["score_valid"], label = "Actor-critic")
ax[0].plot(x_axis, qlearn_LR4.model_factory.loss_dict["score_valid"], label = "Q-learning")
ax[1].plot(x_axis, ppo_LR3.model_factory.loss_dict["score_valid"], label = "PPO")
ax[1].plot(x_axis, ac_LR3.model_factory.loss_dict["score_valid"], label = "Actor-critic")
ax[1].plot(x_axis, qlearn_LR3.model_factory.loss_dict["score_valid"], label = "Q-learning")
ax[2].plot(x_axis, ppo_LR2.model_factory.loss_dict["score_valid"], label = "PPO")
ax[2].plot(x_axis, ac_LR2.model_factory.loss_dict["score_valid"], label = "Actor-critic")
ax[2].plot(x_axis, qlearn_LR2.model_factory.loss_dict["score_valid"], label = "Q-learning")


ax[0].set_xlabel("Epoch")
ax[1].set_xlabel("Epoch")
ax[2].set_xlabel("Epoch")
ax[0].set_ylabel("Total Reward Per Episode")
ax[0].legend()


ax[0].set_xlim(xlim_lower, xlim_upper)
ax[1].set_xlim(xlim_lower, xlim_upper)
ax[2].set_xlim(xlim_lower, xlim_upper)
ax[0].set_ylim(ylim_lower, ylim_upper)
ax[1].set_ylim(ylim_lower, ylim_upper)
ax[2].set_ylim(ylim_lower, ylim_upper)

ax[0].set_title("LR = 1e-4")
ax[1].set_title("LR = 1e-3")
ax[2].set_title("LR = 1e-2")

fig.savefig("blog/plots/robustness_comparison.png")


In [None]:
Nx, Ny = 4, 4
epochs_total = 120 * 8
epochs_per_valid = 8
x_axis = np.arange(0, epochs_total+1, epochs_per_valid)
ylim_lower, ylim_upper = -64, 0
xlim_lower, xlim_upper = 0, x_axis[-1]

ppo = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValue_Size4LR3", actor_critic_class = ACValue)
ppo.model_factory.load()

ppo_wide = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueWide_Size4LR3", actor_critic_class = ACValueWide)
ppo_wide.model_factory.load()

ppo_very_wide = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueVeryWide_Size4LR3", actor_critic_class = ACValueVeryWide)
ppo_very_wide.model_factory.load()

ppo_deep = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueDeep_Size4LR3", actor_critic_class = ACValueDeep)
ppo_deep.model_factory.load()

ppo_very_deep = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueVeryDeep_Size4LR3", actor_critic_class = ACValueVeryDeep)
ppo_very_deep.model_factory.load()

fig, ax = plt.subplots(1, 2, figsize=(12,4))

ax[0].plot(x_axis, ppo.model_factory.loss_dict["score_valid"], label = "Baseline")
ax[0].plot(x_axis, ppo_wide.model_factory.loss_dict["score_valid"], label = "2x Width")
ax[0].plot(x_axis, ppo_very_wide.model_factory.loss_dict["score_valid"], label = "4x Width")
ax[0].set_xlabel("Epoch")
ax[0].set_ylabel("Total Reward Per Episode")
ax[0].legend()
ax[0].set_xlim(xlim_lower, xlim_upper)
ax[0].set_ylim(ylim_lower, ylim_upper)

ax[1].plot(x_axis, ppo.model_factory.loss_dict["score_valid"], label = "Baseline")
ax[1].plot(x_axis, ppo_deep.model_factory.loss_dict["score_valid"], label = "+4 Layers")
ax[1].plot(x_axis, ppo_very_deep.model_factory.loss_dict["score_valid"], label = "+8 Layers")
ax[1].set_xlabel("Epoch")
ax[1].legend()
ax[1].set_xlim(xlim_lower, xlim_upper)
ax[1].set_ylim(ylim_lower, ylim_upper)

fig.savefig("blog/plots/width_depth.png")


In [None]:
Nx, Ny = 4, 4
episodes_total = 120 * 8 * 64
episodes_per_valid = 8 * 64
x_axis = np.arange(0, episodes_total+1, episodes_per_valid)
ylim_lower, ylim_upper = -64, 0
xlim_lower, xlim_upper = 0, x_axis[-1]

ppo_N64 = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueWide_Size4LR3", actor_critic_class = ACValueWide)
ppo_N64.model_factory.load()

ppo_N16 = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueWide_Size4LR3N16", actor_critic_class = ACValueWide)
ppo_N16.model_factory.load()

ppo_N4 = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueWide_Size4LR3N4", actor_critic_class = ACValueWide)
ppo_N4.model_factory.load()

fig, ax = plt.subplots(1, 1, figsize=(6,4))

ax.plot(x_axis, ppo_N64.model_factory.loss_dict["score_valid"], label = "64 actors/epoch")
ax.plot(x_axis, ppo_N16.model_factory.loss_dict["score_valid"], label = "16 actors/epoch")
ax.plot(x_axis, ppo_N4.model_factory.loss_dict["score_valid"], label = "4 actors/epoch")
ax.set_xlabel("Episode")
ax.set_ylabel("Total Reward Per Episode")
ax.legend()


ax.set_xlim(xlim_lower, xlim_upper)
ax.set_ylim(ylim_lower, ylim_upper)

fig.savefig("blog/plots/multiple_actors.png")


In [None]:
Nx, Ny = 8, 8
epochs_total = 80 * 8
epochs_per_valid = 8
x_axis = np.arange(0, epochs_total+1, epochs_per_valid)
ylim_lower, ylim_upper = -64, 0
xlim_lower, xlim_upper = 0, x_axis[-1]

qlearn = QLearn(Nx=Nx, Ny=Ny, name="QLearn_QValueWide_Size8LR3", Q_class = QValueWide)
qlearn.model_factory.load()

ac = AC(Nx=Nx, Ny=Ny, name="AC_ACValueWide_Size8LR3", actor_critic_class = ACValueWide)
ac.model_factory.load()

ppo = PPO(Nx=Nx, Ny=Ny, name="PPO_ACValueWide_Size8LR3", actor_critic_class = ACValueWide)
ppo.model_factory.load()

fig, ax = plt.subplots(1, 1, figsize=(6,4))

ax.plot(x_axis, ppo.model_factory.loss_dict["score_valid"], label = "PPO")
ax.plot(x_axis, ac.model_factory.loss_dict["score_valid"], label = "Actor-critic")
ax.plot(x_axis, qlearn.model_factory.loss_dict["score_valid"], label = "Q-learning")
ax.set_xlabel("Epoch")
ax.set_ylabel("Total Reward Per Episode")
ax.legend()

ax.set_xlim(xlim_lower, xlim_upper)
ax.set_ylim(ylim_lower, ylim_upper)

fig.savefig("blog/plots/size8.png")


In [None]:
Nx, Ny = 4, 4
game = pickle.load(open("blog/saved_games/game_size4_0.p", "rb"))
visual = Visual(game)
fig, ax = visual.plot_history(n_cols=9)
fig.savefig("blog/plots/game_size4_0.png")

In [None]:
Nx, Ny = 4, 4
game = pickle.load(open("blog/saved_games/game_size4_1.p", "rb"))
visual = Visual(game)
fig, ax = visual.plot_history(n_cols=9)
fig.savefig("blog/plots/game_size4_1.png")

In [None]:
Nx, Ny = 4, 4
game = pickle.load(open("blog/saved_games/game_size4_2.p", "rb"))
visual = Visual(game)
fig, ax = visual.plot_history(n_cols=9)
fig.savefig("blog/plots/game_size4_2.png")

In [None]:
Nx, Ny = 8, 8
game = pickle.load(open("blog/saved_games/game_size8_0.p", "rb"))
visual = Visual(game)
fig, ax = visual.plot_history()
fig.savefig("blog/plots/game_size8_0.png")

In [None]:
Nx, Ny = 8, 8
game = pickle.load(open("blog/saved_games/game_size8_1.p", "rb"))
visual = Visual(game)
fig, ax = visual.plot_history()
fig.savefig("blog/plots/game_size8_1.png")

In [None]:
Nx, Ny = 8, 8
game = pickle.load(open("blog/saved_games/game_size8_2.p", "rb"))
visual = Visual(game)
fig, ax = visual.plot_history()
fig.savefig("blog/plots/game_size8_2.png")