In [1]:
import gym
from bayes_opt import BayesianOptimization

from agent import Agent
from monitor import interact

In [2]:
env = gym.make('Taxi-v3')
def interact_wrapper(eps, eps_decay, alpha, gamma=0.9):
    agent = Agent(eps=eps, eps_decay=eps_decay, alpha=alpha, gamma=gamma)
    avg_rewards, best_avg_reward = interact(env, agent, 20000)
    return best_avg_reward

In [3]:
pbounds = {'eps': (0.5, 1.), 'eps_decay': (0.5, 0.9), 'alpha': (0.1, 0.2)}
optimizer = BayesianOptimization(
    f=interact_wrapper,
    pbounds=pbounds,
    random_state=47
)

In [4]:
optimizer.probe(
    params={'eps': .87, 'eps_decay': 0.5, 'alpha': 0.11},
    lazy=True,
)

In [5]:
optimizer.maximize(
    init_points=10,
    n_iter=100
)

|   iter    |  target   |   alpha   |    eps    | eps_decay |
-------------------------------------------------------------
20000 episodes passed

| [0m 1       [0m | [0m 8.94    [0m | [0m 0.11    [0m | [0m 0.87    [0m | [0m 0.5     [0m |
20000 episodes passed

| [0m 2       [0m | [0m 8.75    [0m | [0m 0.1113  [0m | [0m 0.9872  [0m | [0m 0.7915  [0m |
20000 episodes passed

| [0m 3       [0m | [0m 8.92    [0m | [0m 0.1351  [0m | [0m 0.8538  [0m | [0m 0.8198  [0m |
20000 episodes passed

| [0m 4       [0m | [0m 8.76    [0m | [0m 0.1646  [0m | [0m 0.7073  [0m | [0m 0.7824  [0m |
20000 episodes passed

| [0m 5       [0m | [0m 8.74    [0m | [0m 0.1247  [0m | [0m 0.628   [0m | [0m 0.5096  [0m |
20000 episodes passed

| [0m 6       [0m | [0m 8.71    [0m | [0m 0.1099  [0m | [0m 0.6502  [0m | [0m 0.7563  [0m |
20000 episodes passed

| [0m 7       [0m | [0m 8.75    [0m | [0m 0.1322  [0m | [0m 0.5927  [0m | [0m 0.8669  [0m |
2

In [7]:
print('Best result: {}'.format(optimizer.max))
print('Points checked: '.format(len(optimizer.space)))

Best result: {'target': 9.19, 'params': {'alpha': 0.18602786467835314, 'eps': 0.8317873591544797, 'eps_decay': 0.5064616875543634}}
Points checked: 
