# Navigation

---

In [None]:
from unityagents import UnityEnvironment
import numpy as np
from agent import Agent
from train_dqn import train_dqn
import torch
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
env = UnityEnvironment(file_name="Banana.app", no_graphics=True)

In [None]:
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [None]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of actions
action_size = brain.vector_action_space_size

# examine the state space 
state = env_info.vector_observations[0]
state_size = len(state)

In [None]:
SEED = 0
FC_SIZE = 12
BUFFER_SIZE = int(1e5)
BATCH_SIZE = 64
GAMMA = 0.99
TAU = 1e-3
LR = 0.001
UPDATE_EVERY = 4
EPS_DECAY = 0.99
EPS_MIN = 0.01

In [None]:
agent = Agent(state_size, action_size, FC_SIZE, SEED, LR, BUFFER_SIZE, BATCH_SIZE, UPDATE_EVERY)

In [None]:
scores = train_dqn(1000, agent, env, GAMMA, TAU, EPS_DECAY, EPS_MIN, train=True)

In [None]:
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Epochs')

### Evaluation

In [None]:
pretrained_state_dict = torch.load("checkpoint.pth")

In [None]:
agent = Agent(state_size, action_size, FC_SIZE, SEED, LR, BUFFER_SIZE, BATCH_SIZE, UPDATE_EVERY)

In [None]:
agent.qnetwork_local.load_state_dict(pretrained_state_dict)

In [None]:
scores = train_dqn(100, agent, env, GAMMA, TAU, EPS_DECAY, EPS_MIN, train=False)

In [None]:
plt.plot(np.arange(len(scores)), scores)
plt.hlines(13, 0, 100, colors='green', linestyle='dashed')
plt.hlines(np.mean(scores), 0, 100, colors='red', linestyle='dashed')
plt.ylabel('Score')
plt.xlabel('Episodes')

In [None]:
env.close()

--------