In [None]:
from unityagents import UnityEnvironment
from bayes_opt import BayesianOptimization
from p1_agent import DoubleDqnAgent
from utils import watch, interact, plot
from workspace_utils import active_session
import torch
%matplotlib inline

In [None]:
env = UnityEnvironment(file_name="/home/yacine/udacity/deep-reinforcement-learning/p1_navigation/Banana_Linux/Banana.x86_64")

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [None]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

In [None]:
enable_gpu = False # Up to the user
gpu_available = torch.cuda.is_available() # Checks the environment
train_on_gpu = enable_gpu and gpu_available

if train_on_gpu:
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

device

In [None]:
# watch an untrained agent
untrained_agent = DoubleDqnAgent(state_size=state_size, action_size=action_size, seed=0, device=device)
watch(untrained_agent, env, brain_name)

In [None]:
agent = DoubleDqnAgent(state_size=state_size, action_size=action_size, seed=0, device=device)
scores, best_avg_reward = interact(agent, env, brain_name, min_score=100.0)

In [None]:
plot(scores)

In [None]:
agent.qnetwork_local.load_state_dict(torch.load('top_model.pth'))
watch(agent, env, brain_name)

In [None]:
# Hyper-parameter tuning, using Bayesian optimization.

def best_avg_reward(lr, gamma):
    agent = DoubleDqnAgent(state_size=state_size, action_size=action_size, seed=0, device=device, lr=lr, gamma=gamma)
    # min_score=100.0. See if we can go go beyond the required min average score of 13.0 for submission.
    _, best_avg_reward = interact(agent, env, brain_name, min_score=100.0)
    return best_avg_reward

pbounds = {'lr': (1e-4, 0.01), 'gamma': (0.8, 1.0)}

optimizer = BayesianOptimization(
    f=best_avg_reward,
    pbounds=pbounds,
    random_state=1
)

optimizer.probe(
    params={'lr': 5e-4, 'gamma': 0.99},
    lazy=True,
)

In [None]:
# with active_session():
optimizer.maximize(init_points=3, n_iter=25)

In [None]:
print(optimizer.max)

In [None]:
# Instantiate an agent with best hyper-params
best_lr = optimizer.max["params"]["lr"]
best_gamma = optimizer.max["params"]["gamma"]
agent = DoubleDqnAgent(state_size=state_size, action_size=action_size, seed=0, device=device, lr=best_lr, gamma=best_gamma)

In [None]:
# with active_session():
scores, best_avg_reward = interact(agent, env, brain_name)
best_avg_reward

In [None]:
plot(scores)

In [None]:
# Load the trained model.
enable_gpu = False # Up to the user
gpu_available = torch.cuda.is_available() # Checks the environment
train_on_gpu = enable_gpu and gpu_available

if train_on_gpu:
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
agent.qnetwork_local.load_state_dict(torch.load('top_model_1.pth'))
watch(agent, env, brain_name)

In [None]:
env.close()