In [1]:
import sys
import os.path as osp
import gym.spaces
import itertools
import numpy as np
import random
import tensorflow                as tf
import tensorflow.contrib.layers as layers
from collections import namedtuple
from dqn_utils import *
from gym import wrappers
from atari_wrappers import *


OptimizerSpec = namedtuple("OptimizerSpec", ["constructor", "kwargs", "lr_schedule"])

In [2]:
def get_session():
    tf.reset_default_graph()
    tf_config = tf.ConfigProto(
        inter_op_parallelism_threads=1,
        intra_op_parallelism_threads=1)
    session = tf.Session(config=tf_config)
    return session

In [3]:
def atari_model(img_in, num_actions, scope, reuse=False):
    # as described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # original architecture
            out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
        out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            out = layers.fully_connected(out, num_outputs=512,         activation_fn=tf.nn.relu)
            out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)

        return out

In [4]:
num_timesteps = 1000000
num_iterations = float(num_timesteps) / 4.0

lr_multiplier = 1.0
lr_schedule = PiecewiseSchedule([
                                     (0,                   1e-4 * lr_multiplier),
                                     (num_iterations / 10, 1e-4 * lr_multiplier),
                                     (num_iterations / 2,  5e-5 * lr_multiplier),
                                ],
                                outside_value=5e-5 * lr_multiplier)
optimizer = OptimizerSpec(
    constructor=tf.train.AdamOptimizer,
    kwargs=dict(epsilon=1e-4),
    lr_schedule=lr_schedule
)

In [5]:
benchmark = gym.benchmark_spec('Atari40M')

# Change the index to select a different game.
task = benchmark.tasks[3]

# Run training
seed = 0 # Use a seed of zero (you may want to randomize the seed!)
env = gym.make(task.env_id)
# set_global_seeds(seed)
# env.seed(seed)

expt_dir = '/tmp/hw3_vid_dir2/'
env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
env = wrap_deepmind(env)



session = get_session()
# tf.global_variables_initializer().run()

[2018-06-21 01:29:16,983] Making new env: PongNoFrameskip-v4
[2018-06-21 01:29:17,118] Clearing 4 monitor files from previous run (because force=True was provided)


In [6]:
env = env
q_func = atari_model
optimizer_spec = optimizer
session =  session
exploration=LinearSchedule(1000000, 0.1)
stopping_criterion=None
replay_buffer_size=1000000
batch_size=32
gamma=0.99
learning_starts=50000
learning_freq=4
frame_history_len=4
target_update_freq=10000
grad_norm_clipping=10
"""Run Deep Q-learning algorithm.

You can specify your own convnet using q_func.

All schedules are w.r.t. total number of steps taken in the environment.

Parameters
----------
env: gym.Env
    gym environment to train on.
q_func: function
    Model to use for computing the q function. It should accept the
    following named arguments:
        img_in: tf.Tensor
            tensorflow tensor representing the input image
        num_actions: int
            number of actions
        scope: str
            scope in which all the model related variables
            should be created
        reuse: bool
            whether previously created variables should be reused.
optimizer_spec: OptimizerSpec
    Specifying the constructor and kwargs, as well as learning rate schedule
    for the optimizer
session: tf.Session
    tensorflow session to use.
exploration: rl_algs.deepq.utils.schedules.Schedule
    schedule for probability of chosing random action.
stopping_criterion: (env, t) -> bool
    should return true when it's ok for the RL algorithm to stop.
    takes in env and the number of steps executed so far.
replay_buffer_size: int
    How many memories to store in the replay buffer.
batch_size: int
    How many transitions to sample each time experience is replayed.
gamma: float
    Discount Factor
learning_starts: int
    After how many environment steps to start replaying experiences
learning_freq: int
    How many steps of environment to take between every experience replay
frame_history_len: int
    How many past frames to include as input to the model.
target_update_freq: int
    How many experience replay rounds (not steps!) to perform between
    each update to the target Q network
grad_norm_clipping: float or None
    If not None gradients' norms are clipped to this value.
"""
assert type(env.observation_space) == gym.spaces.Box
assert type(env.action_space)      == gym.spaces.Discrete

In [7]:
###############
# BUILD MODEL #
###############

if len(env.observation_space.shape) == 1:
    # This means we are running on low-dimensional observations (e.g. RAM)
    input_shape = env.observation_space.shape
else:
    img_h, img_w, img_c = env.observation_space.shape
    input_shape = (img_h, img_w, frame_history_len * img_c)
num_actions = env.action_space.n

In [8]:
input_shape

(84, 84, 4)

In [9]:
# set up placeholders
# placeholder for current observation (or state)
obs_t_ph              = tf.placeholder(tf.uint8, [None] + list(input_shape))
# placeholder for current action
act_t_ph              = tf.placeholder(tf.int32,   [None])
# placeholder for current reward
rew_t_ph              = tf.placeholder(tf.float32, [None])
# placeholder for next observation (or state)
obs_tp1_ph            = tf.placeholder(tf.uint8, [None] + list(input_shape))
# placeholder for end of episode mask
# this value is 1 if the next state corresponds to the end of an episode,
# in which case there is no Q-value at the next state; at the end of an
# episode, only the current state reward contributes to the target, not the
# next state Q-value (i.e. target is just rew_t_ph, not rew_t_ph + gamma * q_tp1)
done_mask_ph          = tf.placeholder(tf.float32, [None])

# casting to float on GPU ensures lower data transfer times.
obs_t_float   = tf.cast(obs_t_ph,   tf.float32) / 255.0
obs_tp1_float = tf.cast(obs_tp1_ph, tf.float32) / 255.0

In [10]:
# Here, you should fill in your own code to compute the Bellman error. This requires
# evaluating the current and next Q-values and constructing the corresponding error.
# TensorFlow will differentiate this error for you, you just need to pass it to the
# optimizer. See assignment text for details.
# Your code should produce one scalar-valued tensor: total_error
# This will be passed to the optimizer in the provided code below.
# Your code should also produce two collections of variables:
# q_func_vars
# target_q_func_vars
# These should hold all of the variables of the Q-function network and target network,
# respectively. A convenient way to get these is to make use of TF's "scope" feature.
# For example, you can create your Q-function network with the scope "q_func" like this:
# <something> = q_func(obs_t_float, num_actions, scope="q_func", reuse=False)
# And then you can obtain the variables like this:
# q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_func')
# Older versions of TensorFlow may require using "VARIABLES" instead of "GLOBAL_VARIABLES"
######

# YOUR CODE HERE
#get q value for all actions for the current step and for the next step
q_t_all = q_func(img_in=obs_t_float, num_actions=num_actions, scope="q_func", reuse=False)
greedy_action = tf.argmax(q_t_all, axis=1)

q_tp1_all = q_func(img_in=obs_tp1_float, num_actions=num_actions, scope="target_q_func", reuse=False)

q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_func')
target_q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_q_func')

q_t = tf.reduce_sum(tf.one_hot(act_t_ph, depth=num_actions, dtype=tf.float32) * q_t_all, axis=1)
v_tp1 = tf.reduce_max(q_tp1_all, axis=1)
q_target = rew_t_ph + (1.0 - done_mask_ph) * gamma * v_tp1

total_error = tf.losses.mean_squared_error(q_target, q_t)

In [11]:
######

# construct optimization op (with gradient clipping)
learning_rate = tf.placeholder(tf.float32, (), name="learning_rate")
optimizer = optimizer_spec.constructor(learning_rate=learning_rate, **optimizer_spec.kwargs)
train_fn = minimize_and_clip(optimizer, total_error,
             var_list=q_func_vars, clip_val=grad_norm_clipping)

# update_target_fn will be called periodically to copy Q network to target Q network
update_target_fn = []
for var, var_target in zip(sorted(q_func_vars,        key=lambda v: v.name),
                           sorted(target_q_func_vars, key=lambda v: v.name)):
    update_target_fn.append(var_target.assign(var))
update_target_fn = tf.group(*update_target_fn)

# construct the replay buffer
replay_buffer = ReplayBuffer(replay_buffer_size, frame_history_len)

###############
# RUN ENV     #
###############
model_initialized = False
num_param_updates = 0
mean_episode_reward      = -float('nan')
best_mean_episode_reward = -float('inf')
last_obs = env.reset()
LOG_EVERY_N_STEPS = 10000

[2018-06-21 01:31:55,616] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video000000.mp4


In [None]:
for t in itertools.count():
    ### 1. Check stopping criterion
    if stopping_criterion is not None and stopping_criterion(env, t):
        print("stopping")
        break

    ### 2. Step the env and store the transition
    # At this point, "last_obs" contains the latest observation that was
    # recorded from the simulator. Here, your code needs to store this
    # observation and its outcome (reward, next observation, etc.) into
    # the replay buffer while stepping the simulator forward one step.
    # At the end of this block of code, the simulator should have been
    # advanced one step, and the replay buffer should contain one more
    # transition.
    # Specifically, last_obs must point to the new latest observation.
    # Useful functions you'll need to call:
    # obs, reward, done, info = env.step(action)
    # this steps the environment forward one step
    # obs = env.reset()
    # this resets the environment if you reached an episode boundary.
    # Don't forget to call env.reset() to get a new observation if done
    # is true!!
    # Note that you cannot use "last_obs" directly as input
    # into your network, since it needs to be processed to include context
    # from previous frames. You should check out the replay buffer
    # implementation in dqn_utils.py to see what functionality the replay
    # buffer exposes. The replay buffer has a function called
    # encode_recent_observation that will take the latest observation
    # that you pushed into the buffer and compute the corresponding
    # input that should be given to a Q network by appending some
    # previous frames.
    # Don't forget to include epsilon greedy exploration!
    # And remember that the first time you enter this loop, the model
    # may not yet have been initialized (but of course, the first step
    # might as well be random, since you haven't trained your net...)

    #####

    # YOUR CODE HERE
    idx = replay_buffer.store_frame(last_obs)
    
    if not model_initialized or random.random() < exploration.value(t):
        action = random.randint(0, num_actions - 1)
    else:
        obs = replay_buffer.encode_recent_observation()
        action = session.run(greedy_action, {obs_t_ph: [obs]})
    
    next_obs, reward, done, _ = env.step(action)
    if done:
        last_obs = env.reset()

    replay_buffer.store_effect(idx, action, reward, done)
    last_obs = next_obs

    replay_buffer.store_effect(working_idx, action, reward, done)

    #####

    # at this point, the environment should have been advanced one step (and
    # reset if done was true), and last_obs should point to the new latest
    # observation

    ### 3. Perform experience replay and train the network.
    # note that this is only done if the replay buffer contains enough samples
    # for us to learn something useful -- until then, the model will not be
    # initialized and random actions should be taken
    if (t > learning_starts and
            t % learning_freq == 0 and
            replay_buffer.can_sample(batch_size)):
        # Here, you should perform training. Training consists of four steps:
        # 3.a: use the replay buffer to sample a batch of transitions (see the
        # replay buffer code for function definition, each batch that you sample
        # should consist of current observations, current actions, rewards,
        # next observations, and done indicator).
        # 3.b: initialize the model if it has not been initialized yet; to do
        # that, call
        #    initialize_interdependent_variables(session, tf.global_variables(), {
        #        obs_t_ph: obs_t_batch,
        #        obs_tp1_ph: obs_tp1_batch,
        #    })
        # where obs_t_batch and obs_tp1_batch are the batches of observations at
        # the current and next time step. The boolean variable model_initialized
        # indicates whether or not the model has been initialized.
        # Remember that you have to update the target network too (see 3.d)!
        # 3.c: train the model. To do this, you'll need to use the train_fn and
        # total_error ops that were created earlier: total_error is what you
        # created to compute the total Bellman error in a batch, and train_fn
        # will actually perform a gradient step and update the network parameters
        # to reduce total_error. When calling session.run on these you'll need to
        # populate the following placeholders:
        # obs_t_ph
        # act_t_ph
        # rew_t_ph
        # obs_tp1_ph
        # done_mask_ph
        # (this is needed for computing total_error)
        # learning_rate -- you can get this from optimizer_spec.lr_schedule.value(t)
        # (this is needed by the optimizer to choose the learning rate)
        # 3.d: periodically update the target network by calling
        # session.run(update_target_fn)
        # you should update every target_update_freq steps, and you may find the
        # variable num_param_updates useful for this (it was initialized to 0)
        #####

        # YOUR CODE HERE
        
        obs_t_batch, act_batch, rew_batch, obs_tp1_batch, done_mask = replay_buffer.sample(batch_size)
        
        
        
        session.run(train_fn, feed_dict={obs_t_ph: obs_t_batch, act_t_ph: act_batch, \
                    obs_tp1_ph: obs_tp1_batch, rew_t_ph: rew_batch, \
                    done_mask_ph: done_mask, learning_rate: optimizer_spec.lr_schedule.value(t)})
        
        if t and not  t%target_update_freq:
            session.run(update_target_fn)
            num_param_updates += 1
        #####
        

    ### 4. Log progress
    episode_rewards = get_wrapper_by_name(env, "Monitor").get_episode_rewards()
    if len(episode_rewards) > 0:
        mean_episode_reward = np.mean(episode_rewards[-100:])
    if len(episode_rewards) > 100:
        best_mean_episode_reward = max(best_mean_episode_reward, mean_episode_reward)
    if t % LOG_EVERY_N_STEPS == 0 and model_initialized:
        print("Timestep %d" % (t,))
        print("mean reward (100 episodes) %f" % mean_episode_reward)
        print("best mean reward %f" % best_mean_episode_reward)
        print("episodes %d" % len(episode_rewards))
        print("exploration %f" % exploration.value(t))
        print("learning_rate %f" % optimizer_spec.lr_schedule.value(t))
        sys.stdout.flush()

Timestep 0
mean reward (100 episodes) -19.760000
best mean reward -19.720000
episodes 380
exploration 1.000000
learning_rate 0.000100
Timestep 10000
mean reward (100 episodes) -19.780000
best mean reward -19.690000
episodes 390
exploration 0.991000
learning_rate 0.000100
Timestep 20000
mean reward (100 episodes) -19.770000
best mean reward -19.690000
episodes 400
exploration 0.982000
learning_rate 0.000100
Timestep 30000
mean reward (100 episodes) -19.840000
best mean reward -19.690000
episodes 411
exploration 0.973000
learning_rate 0.000097
Timestep 40000
mean reward (100 episodes) -19.880000
best mean reward -19.690000
episodes 422
exploration 0.964000
learning_rate 0.000092
Timestep 50000
mean reward (100 episodes) -19.910000
best mean reward -19.690000
episodes 433
exploration 0.955000
learning_rate 0.000087
Timestep 60000
mean reward (100 episodes) -19.960000
best mean reward -19.690000
episodes 443
exploration 0.946000
learning_rate 0.000082
Timestep 70000
mean reward (100 episod

[2018-06-21 02:33:59,576] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video000512.mp4


Timestep 130000
mean reward (100 episodes) -19.880000
best mean reward -19.690000
episodes 512
exploration 0.883000
learning_rate 0.000050
Timestep 140000
mean reward (100 episodes) -19.820000
best mean reward -19.690000
episodes 521
exploration 0.874000
learning_rate 0.000050
Timestep 150000
mean reward (100 episodes) -19.780000
best mean reward -19.690000
episodes 531
exploration 0.865000
learning_rate 0.000050
Timestep 160000
mean reward (100 episodes) -19.760000
best mean reward -19.690000
episodes 541
exploration 0.856000
learning_rate 0.000050
Timestep 170000
mean reward (100 episodes) -19.690000
best mean reward -19.680000
episodes 550
exploration 0.847000
learning_rate 0.000050
Timestep 180000
mean reward (100 episodes) -19.670000
best mean reward -19.620000
episodes 560
exploration 0.838000
learning_rate 0.000050
Timestep 190000
mean reward (100 episodes) -19.610000
best mean reward -19.580000
episodes 568
exploration 0.829000
learning_rate 0.000050
Timestep 200000
mean reward

[2018-06-21 02:46:26,232] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video000729.mp4


Timestep 390000
mean reward (100 episodes) -18.660000
best mean reward -18.640000
episodes 731
exploration 0.649000
learning_rate 0.000050
Timestep 400000
mean reward (100 episodes) -18.540000
best mean reward -18.540000
episodes 738
exploration 0.640000
learning_rate 0.000050
Timestep 410000
mean reward (100 episodes) -18.510000
best mean reward -18.490000
episodes 745
exploration 0.631000
learning_rate 0.000050
Timestep 420000
mean reward (100 episodes) -18.390000
best mean reward -18.390000
episodes 751
exploration 0.622000
learning_rate 0.000050
Timestep 430000
mean reward (100 episodes) -18.360000
best mean reward -18.360000
episodes 758
exploration 0.613000
learning_rate 0.000050
Timestep 440000
mean reward (100 episodes) -18.270000
best mean reward -18.250000
episodes 765
exploration 0.604000
learning_rate 0.000050
Timestep 450000
mean reward (100 episodes) -18.160000
best mean reward -18.160000
episodes 772
exploration 0.595000
learning_rate 0.000050
Timestep 460000
mean reward

[2018-06-21 03:13:40,160] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video001000.mp4


Timestep 910000
mean reward (100 episodes) -12.470000
best mean reward -12.470000
episodes 1001
exploration 0.181000
learning_rate 0.000050
Timestep 920000
mean reward (100 episodes) -12.410000
best mean reward -12.410000
episodes 1004
exploration 0.172000
learning_rate 0.000050
Timestep 930000
mean reward (100 episodes) -12.150000
best mean reward -12.150000
episodes 1007
exploration 0.163000
learning_rate 0.000050
Timestep 940000
mean reward (100 episodes) -11.920000
best mean reward -11.920000
episodes 1010
exploration 0.154000
learning_rate 0.000050
Timestep 950000
mean reward (100 episodes) -11.640000
best mean reward -11.640000
episodes 1013
exploration 0.145000
learning_rate 0.000050
Timestep 960000
mean reward (100 episodes) -11.180000
best mean reward -11.180000
episodes 1016
exploration 0.136000
learning_rate 0.000050
Timestep 970000
mean reward (100 episodes) -10.860000
best mean reward -10.860000
episodes 1018
exploration 0.127000
learning_rate 0.000050
Timestep 980000
mean

Timestep 1500000
mean reward (100 episodes) 1.720000
best mean reward 1.730000
episodes 1161
exploration 0.100000
learning_rate 0.000050
Timestep 1510000
mean reward (100 episodes) 1.860000
best mean reward 1.860000
episodes 1164
exploration 0.100000
learning_rate 0.000050
Timestep 1520000
mean reward (100 episodes) 2.130000
best mean reward 2.130000
episodes 1166
exploration 0.100000
learning_rate 0.000050
Timestep 1530000
mean reward (100 episodes) 2.450000
best mean reward 2.450000
episodes 1169
exploration 0.100000
learning_rate 0.000050
Timestep 1540000
mean reward (100 episodes) 2.870000
best mean reward 2.870000
episodes 1172
exploration 0.100000
learning_rate 0.000050
Timestep 1550000
mean reward (100 episodes) 3.080000
best mean reward 3.080000
episodes 1175
exploration 0.100000
learning_rate 0.000050
Timestep 1560000
mean reward (100 episodes) 3.310000
best mean reward 3.310000
episodes 1177
exploration 0.100000
learning_rate 0.000050
Timestep 1570000
mean reward (100 episode

Timestep 2100000
mean reward (100 episodes) 14.000000
best mean reward 14.070000
episodes 1387
exploration 0.100000
learning_rate 0.000050
Timestep 2110000
mean reward (100 episodes) 14.110000
best mean reward 14.130000
episodes 1392
exploration 0.100000
learning_rate 0.000050
Timestep 2120000
mean reward (100 episodes) 14.030000
best mean reward 14.150000
episodes 1396
exploration 0.100000
learning_rate 0.000050
Timestep 2130000
mean reward (100 episodes) 14.070000
best mean reward 14.150000
episodes 1400
exploration 0.100000
learning_rate 0.000050
Timestep 2140000
mean reward (100 episodes) 14.190000
best mean reward 14.190000
episodes 1404
exploration 0.100000
learning_rate 0.000050
Timestep 2150000
mean reward (100 episodes) 14.310000
best mean reward 14.310000
episodes 1408
exploration 0.100000
learning_rate 0.000050
Timestep 2160000
mean reward (100 episodes) 14.550000
best mean reward 14.550000
episodes 1412
exploration 0.100000
learning_rate 0.000050
Timestep 2170000
mean rewar

Timestep 2690000
mean reward (100 episodes) 15.570000
best mean reward 15.890000
episodes 1632
exploration 0.100000
learning_rate 0.000050
Timestep 2700000
mean reward (100 episodes) 15.540000
best mean reward 15.890000
episodes 1637
exploration 0.100000
learning_rate 0.000050
Timestep 2710000
mean reward (100 episodes) 15.540000
best mean reward 15.890000
episodes 1641
exploration 0.100000
learning_rate 0.000050
Timestep 2720000
mean reward (100 episodes) 15.490000
best mean reward 15.890000
episodes 1646
exploration 0.100000
learning_rate 0.000050
Timestep 2730000
mean reward (100 episodes) 15.560000
best mean reward 15.890000
episodes 1650
exploration 0.100000
learning_rate 0.000050
Timestep 2740000
mean reward (100 episodes) 15.690000
best mean reward 15.890000
episodes 1654
exploration 0.100000
learning_rate 0.000050
Timestep 2750000
mean reward (100 episodes) 15.760000
best mean reward 15.890000
episodes 1658
exploration 0.100000
learning_rate 0.000050
Timestep 2760000
mean rewar

Timestep 3280000
mean reward (100 episodes) 16.000000
best mean reward 16.380000
episodes 1893
exploration 0.100000
learning_rate 0.000050
Timestep 3290000
mean reward (100 episodes) 15.940000
best mean reward 16.380000
episodes 1897
exploration 0.100000
learning_rate 0.000050
Timestep 3300000
mean reward (100 episodes) 15.950000
best mean reward 16.380000
episodes 1902
exploration 0.100000
learning_rate 0.000050
Timestep 3310000
mean reward (100 episodes) 16.010000
best mean reward 16.380000
episodes 1906
exploration 0.100000
learning_rate 0.000050
Timestep 3320000
mean reward (100 episodes) 16.070000
best mean reward 16.380000
episodes 1911
exploration 0.100000
learning_rate 0.000050
Timestep 3330000
mean reward (100 episodes) 16.000000
best mean reward 16.380000
episodes 1916
exploration 0.100000
learning_rate 0.000050
Timestep 3340000
mean reward (100 episodes) 15.940000
best mean reward 16.380000
episodes 1920
exploration 0.100000
learning_rate 0.000050
Timestep 3350000
mean rewar

[2018-06-21 05:40:50,647] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video002000.mp4


Timestep 3510000
mean reward (100 episodes) 17.020000
best mean reward 17.020000
episodes 2000
exploration 0.100000
learning_rate 0.000050
Timestep 3520000
mean reward (100 episodes) 17.090000
best mean reward 17.090000
episodes 2005
exploration 0.100000
learning_rate 0.000050
Timestep 3530000
mean reward (100 episodes) 17.110000
best mean reward 17.150000
episodes 2010
exploration 0.100000
learning_rate 0.000050
Timestep 3540000
mean reward (100 episodes) 17.140000
best mean reward 17.170000
episodes 2015
exploration 0.100000
learning_rate 0.000050
Timestep 3550000
mean reward (100 episodes) 17.340000
best mean reward 17.340000
episodes 2020
exploration 0.100000
learning_rate 0.000050
Timestep 3560000
mean reward (100 episodes) 17.390000
best mean reward 17.410000
episodes 2025
exploration 0.100000
learning_rate 0.000050
Timestep 3570000
mean reward (100 episodes) 17.350000
best mean reward 17.410000
episodes 2030
exploration 0.100000
learning_rate 0.000050
Timestep 3580000
mean rewar

Timestep 4100000
mean reward (100 episodes) 17.230000
best mean reward 17.610000
episodes 2283
exploration 0.100000
learning_rate 0.000050
Timestep 4110000
mean reward (100 episodes) 17.170000
best mean reward 17.610000
episodes 2287
exploration 0.100000
learning_rate 0.000050
Timestep 4120000
mean reward (100 episodes) 17.260000
best mean reward 17.610000
episodes 2292
exploration 0.100000
learning_rate 0.000050
Timestep 4130000
mean reward (100 episodes) 17.110000
best mean reward 17.610000
episodes 2297
exploration 0.100000
learning_rate 0.000050
Timestep 4140000
mean reward (100 episodes) 17.160000
best mean reward 17.610000
episodes 2302
exploration 0.100000
learning_rate 0.000050
Timestep 4150000
mean reward (100 episodes) 17.260000
best mean reward 17.610000
episodes 2307
exploration 0.100000
learning_rate 0.000050
Timestep 4160000
mean reward (100 episodes) 17.360000
best mean reward 17.610000
episodes 2312
exploration 0.100000
learning_rate 0.000050
Timestep 4170000
mean rewar

Timestep 4690000
mean reward (100 episodes) 17.560000
best mean reward 17.740000
episodes 2568
exploration 0.100000
learning_rate 0.000050
Timestep 4700000
mean reward (100 episodes) 17.580000
best mean reward 17.740000
episodes 2573
exploration 0.100000
learning_rate 0.000050
Timestep 4710000
mean reward (100 episodes) 17.530000
best mean reward 17.740000
episodes 2578
exploration 0.100000
learning_rate 0.000050
Timestep 4720000
mean reward (100 episodes) 17.490000
best mean reward 17.740000
episodes 2583
exploration 0.100000
learning_rate 0.000050
Timestep 4730000
mean reward (100 episodes) 17.460000
best mean reward 17.740000
episodes 2587
exploration 0.100000
learning_rate 0.000050
Timestep 4740000
mean reward (100 episodes) 17.420000
best mean reward 17.740000
episodes 2593
exploration 0.100000
learning_rate 0.000050
Timestep 4750000
mean reward (100 episodes) 17.390000
best mean reward 17.740000
episodes 2597
exploration 0.100000
learning_rate 0.000050
Timestep 4760000
mean rewar

Timestep 5280000
mean reward (100 episodes) 17.700000
best mean reward 18.060000
episodes 2860
exploration 0.100000
learning_rate 0.000050
Timestep 5290000
mean reward (100 episodes) 17.700000
best mean reward 18.060000
episodes 2865
exploration 0.100000
learning_rate 0.000050
Timestep 5300000
mean reward (100 episodes) 17.720000
best mean reward 18.060000
episodes 2870
exploration 0.100000
learning_rate 0.000050
Timestep 5310000
mean reward (100 episodes) 17.710000
best mean reward 18.060000
episodes 2874
exploration 0.100000
learning_rate 0.000050
Timestep 5320000
mean reward (100 episodes) 17.600000
best mean reward 18.060000
episodes 2879
exploration 0.100000
learning_rate 0.000050
Timestep 5330000
mean reward (100 episodes) 17.620000
best mean reward 18.060000
episodes 2884
exploration 0.100000
learning_rate 0.000050
Timestep 5340000
mean reward (100 episodes) 17.700000
best mean reward 18.060000
episodes 2890
exploration 0.100000
learning_rate 0.000050
Timestep 5350000
mean rewar

[2018-06-21 07:36:16,374] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video003000.mp4


Timestep 5560000
mean reward (100 episodes) 18.320000
best mean reward 18.450000
episodes 3002
exploration 0.100000
learning_rate 0.000050
Timestep 5570000
mean reward (100 episodes) 18.240000
best mean reward 18.450000
episodes 3007
exploration 0.100000
learning_rate 0.000050
Timestep 5580000
mean reward (100 episodes) 18.230000
best mean reward 18.450000
episodes 3012
exploration 0.100000
learning_rate 0.000050
Timestep 5590000
mean reward (100 episodes) 18.220000
best mean reward 18.450000
episodes 3018
exploration 0.100000
learning_rate 0.000050
Timestep 5600000
mean reward (100 episodes) 18.210000
best mean reward 18.450000
episodes 3023
exploration 0.100000
learning_rate 0.000050
Timestep 5610000
mean reward (100 episodes) 18.160000
best mean reward 18.450000
episodes 3028
exploration 0.100000
learning_rate 0.000050
Timestep 5620000
mean reward (100 episodes) 18.070000
best mean reward 18.450000
episodes 3033
exploration 0.100000
learning_rate 0.000050
Timestep 5630000
mean rewar

Timestep 6150000
mean reward (100 episodes) 17.590000
best mean reward 18.450000
episodes 3296
exploration 0.100000
learning_rate 0.000050
Timestep 6160000
mean reward (100 episodes) 17.490000
best mean reward 18.450000
episodes 3301
exploration 0.100000
learning_rate 0.000050
Timestep 6170000
mean reward (100 episodes) 17.550000
best mean reward 18.450000
episodes 3307
exploration 0.100000
learning_rate 0.000050
Timestep 6180000
mean reward (100 episodes) 17.600000
best mean reward 18.450000
episodes 3312
exploration 0.100000
learning_rate 0.000050
Timestep 6190000
mean reward (100 episodes) 17.680000
best mean reward 18.450000
episodes 3317
exploration 0.100000
learning_rate 0.000050
Timestep 6200000
mean reward (100 episodes) 17.660000
best mean reward 18.450000
episodes 3322
exploration 0.100000
learning_rate 0.000050
Timestep 6210000
mean reward (100 episodes) 17.600000
best mean reward 18.450000
episodes 3327
exploration 0.100000
learning_rate 0.000050
Timestep 6220000
mean rewar

Timestep 6740000
mean reward (100 episodes) 17.770000
best mean reward 18.450000
episodes 3595
exploration 0.100000
learning_rate 0.000050
Timestep 6750000
mean reward (100 episodes) 17.830000
best mean reward 18.450000
episodes 3600
exploration 0.100000
learning_rate 0.000050
Timestep 6760000
mean reward (100 episodes) 17.850000
best mean reward 18.450000
episodes 3605
exploration 0.100000
learning_rate 0.000050
Timestep 6770000
mean reward (100 episodes) 17.840000
best mean reward 18.450000
episodes 3610
exploration 0.100000
learning_rate 0.000050
Timestep 6780000
mean reward (100 episodes) 17.820000
best mean reward 18.450000
episodes 3615
exploration 0.100000
learning_rate 0.000050
Timestep 6790000
mean reward (100 episodes) 17.800000
best mean reward 18.450000
episodes 3620
exploration 0.100000
learning_rate 0.000050
Timestep 6800000
mean reward (100 episodes) 17.720000
best mean reward 18.450000
episodes 3625
exploration 0.100000
learning_rate 0.000050
Timestep 6810000
mean rewar

Timestep 7330000
mean reward (100 episodes) 17.520000
best mean reward 18.450000
episodes 3894
exploration 0.100000
learning_rate 0.000050
Timestep 7340000
mean reward (100 episodes) 17.590000
best mean reward 18.450000
episodes 3899
exploration 0.100000
learning_rate 0.000050
Timestep 7350000
mean reward (100 episodes) 17.640000
best mean reward 18.450000
episodes 3904
exploration 0.100000
learning_rate 0.000050
Timestep 7360000
mean reward (100 episodes) 17.550000
best mean reward 18.450000
episodes 3909
exploration 0.100000
learning_rate 0.000050
Timestep 7370000
mean reward (100 episodes) 17.650000
best mean reward 18.450000
episodes 3914
exploration 0.100000
learning_rate 0.000050
Timestep 7380000
mean reward (100 episodes) 17.580000
best mean reward 18.450000
episodes 3919
exploration 0.100000
learning_rate 0.000050
Timestep 7390000
mean reward (100 episodes) 17.520000
best mean reward 18.450000
episodes 3925
exploration 0.100000
learning_rate 0.000050
Timestep 7400000
mean rewar

[2018-06-21 09:28:01,179] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video004000.mp4


Timestep 7540000
mean reward (100 episodes) 18.140000
best mean reward 18.450000
episodes 4002
exploration 0.100000
learning_rate 0.000050
Timestep 7550000
mean reward (100 episodes) 18.140000
best mean reward 18.450000
episodes 4007
exploration 0.100000
learning_rate 0.000050
Timestep 7560000
mean reward (100 episodes) 18.010000
best mean reward 18.450000
episodes 4012
exploration 0.100000
learning_rate 0.000050
Timestep 7570000
mean reward (100 episodes) 17.940000
best mean reward 18.450000
episodes 4017
exploration 0.100000
learning_rate 0.000050
Timestep 7580000
mean reward (100 episodes) 17.980000
best mean reward 18.450000
episodes 4022
exploration 0.100000
learning_rate 0.000050
Timestep 7590000
mean reward (100 episodes) 17.990000
best mean reward 18.450000
episodes 4027
exploration 0.100000
learning_rate 0.000050
Timestep 7600000
mean reward (100 episodes) 17.980000
best mean reward 18.450000
episodes 4032
exploration 0.100000
learning_rate 0.000050
Timestep 7610000
mean rewar

Timestep 8130000
mean reward (100 episodes) 17.650000
best mean reward 18.450000
episodes 4298
exploration 0.100000
learning_rate 0.000050
Timestep 8140000
mean reward (100 episodes) 17.610000
best mean reward 18.450000
episodes 4302
exploration 0.100000
learning_rate 0.000050
Timestep 8150000
mean reward (100 episodes) 17.700000
best mean reward 18.450000
episodes 4308
exploration 0.100000
learning_rate 0.000050
Timestep 8160000
mean reward (100 episodes) 17.690000
best mean reward 18.450000
episodes 4313
exploration 0.100000
learning_rate 0.000050
Timestep 8170000
mean reward (100 episodes) 17.720000
best mean reward 18.450000
episodes 4318
exploration 0.100000
learning_rate 0.000050
Timestep 8180000
mean reward (100 episodes) 17.700000
best mean reward 18.450000
episodes 4323
exploration 0.100000
learning_rate 0.000050
Timestep 8190000
mean reward (100 episodes) 17.700000
best mean reward 18.450000
episodes 4328
exploration 0.100000
learning_rate 0.000050
Timestep 8200000
mean rewar

Timestep 9150000
mean reward (100 episodes) 17.990000
best mean reward 18.540000
episodes 4819
exploration 0.100000
learning_rate 0.000050
Timestep 9160000
mean reward (100 episodes) 18.070000
best mean reward 18.540000
episodes 4824
exploration 0.100000
learning_rate 0.000050
Timestep 9170000
mean reward (100 episodes) 18.030000
best mean reward 18.540000
episodes 4829
exploration 0.100000
learning_rate 0.000050
Timestep 9180000
mean reward (100 episodes) 17.840000
best mean reward 18.540000
episodes 4834
exploration 0.100000
learning_rate 0.000050
Timestep 9190000
mean reward (100 episodes) 17.770000
best mean reward 18.540000
episodes 4840
exploration 0.100000
learning_rate 0.000050
Timestep 9200000
mean reward (100 episodes) 17.780000
best mean reward 18.540000
episodes 4845
exploration 0.100000
learning_rate 0.000050
Timestep 9210000
mean reward (100 episodes) 17.770000
best mean reward 18.540000
episodes 4850
exploration 0.100000
learning_rate 0.000050
Timestep 9220000
mean rewar

[2018-06-21 11:18:58,334] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video005000.mp4


Timestep 9500000
mean reward (100 episodes) 18.140000
best mean reward 18.540000
episodes 5000
exploration 0.100000
learning_rate 0.000050
Timestep 9510000
mean reward (100 episodes) 18.180000
best mean reward 18.540000
episodes 5005
exploration 0.100000
learning_rate 0.000050
Timestep 9520000
mean reward (100 episodes) 18.240000
best mean reward 18.540000
episodes 5010
exploration 0.100000
learning_rate 0.000050
Timestep 9530000
mean reward (100 episodes) 18.170000
best mean reward 18.540000
episodes 5015
exploration 0.100000
learning_rate 0.000050
Timestep 9540000
mean reward (100 episodes) 18.140000
best mean reward 18.540000
episodes 5020
exploration 0.100000
learning_rate 0.000050
Timestep 9550000
mean reward (100 episodes) 18.100000
best mean reward 18.540000
episodes 5025
exploration 0.100000
learning_rate 0.000050
Timestep 9560000
mean reward (100 episodes) 18.080000
best mean reward 18.540000
episodes 5031
exploration 0.100000
learning_rate 0.000050
Timestep 9570000
mean rewar

Timestep 10090000
mean reward (100 episodes) 18.020000
best mean reward 18.600000
episodes 5304
exploration 0.100000
learning_rate 0.000050
Timestep 10100000
mean reward (100 episodes) 18.100000
best mean reward 18.600000
episodes 5309
exploration 0.100000
learning_rate 0.000050
Timestep 10110000
mean reward (100 episodes) 18.240000
best mean reward 18.600000
episodes 5315
exploration 0.100000
learning_rate 0.000050
Timestep 10120000
mean reward (100 episodes) 18.190000
best mean reward 18.600000
episodes 5320
exploration 0.100000
learning_rate 0.000050
Timestep 10130000
mean reward (100 episodes) 18.150000
best mean reward 18.600000
episodes 5325
exploration 0.100000
learning_rate 0.000050
Timestep 10140000
mean reward (100 episodes) 18.100000
best mean reward 18.600000
episodes 5330
exploration 0.100000
learning_rate 0.000050
Timestep 10150000
mean reward (100 episodes) 18.030000
best mean reward 18.600000
episodes 5335
exploration 0.100000
learning_rate 0.000050
Timestep 10160000
me

Timestep 10680000
mean reward (100 episodes) 18.410000
best mean reward 18.600000
episodes 5609
exploration 0.100000
learning_rate 0.000050
Timestep 10690000
mean reward (100 episodes) 18.460000
best mean reward 18.600000
episodes 5614
exploration 0.100000
learning_rate 0.000050
Timestep 10700000
mean reward (100 episodes) 18.490000
best mean reward 18.600000
episodes 5619
exploration 0.100000
learning_rate 0.000050
Timestep 10710000
mean reward (100 episodes) 18.410000
best mean reward 18.600000
episodes 5624
exploration 0.100000
learning_rate 0.000050
Timestep 10720000
mean reward (100 episodes) 18.460000
best mean reward 18.600000
episodes 5629
exploration 0.100000
learning_rate 0.000050
Timestep 10730000
mean reward (100 episodes) 18.430000
best mean reward 18.600000
episodes 5634
exploration 0.100000
learning_rate 0.000050
Timestep 10740000
mean reward (100 episodes) 18.500000
best mean reward 18.600000
episodes 5640
exploration 0.100000
learning_rate 0.000050
Timestep 10750000
me

Timestep 11270000
mean reward (100 episodes) 18.400000
best mean reward 18.680000
episodes 5917
exploration 0.100000
learning_rate 0.000050
Timestep 11280000
mean reward (100 episodes) 18.470000
best mean reward 18.680000
episodes 5923
exploration 0.100000
learning_rate 0.000050
Timestep 11290000
mean reward (100 episodes) 18.370000
best mean reward 18.680000
episodes 5928
exploration 0.100000
learning_rate 0.000050
Timestep 11300000
mean reward (100 episodes) 18.320000
best mean reward 18.680000
episodes 5933
exploration 0.100000
learning_rate 0.000050
Timestep 11310000
mean reward (100 episodes) 18.250000
best mean reward 18.680000
episodes 5938
exploration 0.100000
learning_rate 0.000050
Timestep 11320000
mean reward (100 episodes) 18.260000
best mean reward 18.680000
episodes 5943
exploration 0.100000
learning_rate 0.000050
Timestep 11330000
mean reward (100 episodes) 18.170000
best mean reward 18.680000
episodes 5948
exploration 0.100000
learning_rate 0.000050
Timestep 11340000
me

[2018-06-21 13:08:05,457] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video006000.mp4


Timestep 11430000
mean reward (100 episodes) 17.870000
best mean reward 18.680000
episodes 6000
exploration 0.100000
learning_rate 0.000050
Timestep 11440000
mean reward (100 episodes) 17.840000
best mean reward 18.680000
episodes 6005
exploration 0.100000
learning_rate 0.000050
Timestep 11450000
mean reward (100 episodes) 17.850000
best mean reward 18.680000
episodes 6010
exploration 0.100000
learning_rate 0.000050
Timestep 11460000
mean reward (100 episodes) 17.790000
best mean reward 18.680000
episodes 6016
exploration 0.100000
learning_rate 0.000050
Timestep 11470000
mean reward (100 episodes) 17.730000
best mean reward 18.680000
episodes 6021
exploration 0.100000
learning_rate 0.000050
Timestep 11480000
mean reward (100 episodes) 17.750000
best mean reward 18.680000
episodes 6026
exploration 0.100000
learning_rate 0.000050
Timestep 11490000
mean reward (100 episodes) 17.840000
best mean reward 18.680000
episodes 6031
exploration 0.100000
learning_rate 0.000050
Timestep 11500000
me

Timestep 12020000
mean reward (100 episodes) 18.100000
best mean reward 18.680000
episodes 6310
exploration 0.100000
learning_rate 0.000050
Timestep 12030000
mean reward (100 episodes) 18.220000
best mean reward 18.680000
episodes 6315
exploration 0.100000
learning_rate 0.000050
Timestep 12040000
mean reward (100 episodes) 18.160000
best mean reward 18.680000
episodes 6321
exploration 0.100000
learning_rate 0.000050
Timestep 12050000
mean reward (100 episodes) 18.140000
best mean reward 18.680000
episodes 6326
exploration 0.100000
learning_rate 0.000050
Timestep 12060000
mean reward (100 episodes) 18.100000
best mean reward 18.680000
episodes 6331
exploration 0.100000
learning_rate 0.000050
Timestep 12070000
mean reward (100 episodes) 18.080000
best mean reward 18.680000
episodes 6336
exploration 0.100000
learning_rate 0.000050
Timestep 12080000
mean reward (100 episodes) 18.010000
best mean reward 18.680000
episodes 6341
exploration 0.100000
learning_rate 0.000050
Timestep 12090000
me

Timestep 12610000
mean reward (100 episodes) 18.080000
best mean reward 18.680000
episodes 6619
exploration 0.100000
learning_rate 0.000050
Timestep 12620000
mean reward (100 episodes) 18.090000
best mean reward 18.680000
episodes 6624
exploration 0.100000
learning_rate 0.000050
Timestep 12630000
mean reward (100 episodes) 18.200000
best mean reward 18.680000
episodes 6630
exploration 0.100000
learning_rate 0.000050
Timestep 12640000
mean reward (100 episodes) 18.170000
best mean reward 18.680000
episodes 6635
exploration 0.100000
learning_rate 0.000050
Timestep 12650000
mean reward (100 episodes) 18.230000
best mean reward 18.680000
episodes 6640
exploration 0.100000
learning_rate 0.000050
Timestep 12660000
mean reward (100 episodes) 18.180000
best mean reward 18.680000
episodes 6645
exploration 0.100000
learning_rate 0.000050
Timestep 12670000
mean reward (100 episodes) 18.100000
best mean reward 18.680000
episodes 6651
exploration 0.100000
learning_rate 0.000050
Timestep 12680000
me

Timestep 13200000
mean reward (100 episodes) 17.780000
best mean reward 18.680000
episodes 6928
exploration 0.100000
learning_rate 0.000050
Timestep 13210000
mean reward (100 episodes) 17.820000
best mean reward 18.680000
episodes 6933
exploration 0.100000
learning_rate 0.000050
Timestep 13220000
mean reward (100 episodes) 17.770000
best mean reward 18.680000
episodes 6939
exploration 0.100000
learning_rate 0.000050
Timestep 13230000
mean reward (100 episodes) 17.760000
best mean reward 18.680000
episodes 6944
exploration 0.100000
learning_rate 0.000050
Timestep 13240000
mean reward (100 episodes) 17.710000
best mean reward 18.680000
episodes 6949
exploration 0.100000
learning_rate 0.000050
Timestep 13250000
mean reward (100 episodes) 17.770000
best mean reward 18.680000
episodes 6955
exploration 0.100000
learning_rate 0.000050
Timestep 13260000
mean reward (100 episodes) 17.780000
best mean reward 18.680000
episodes 6960
exploration 0.100000
learning_rate 0.000050
Timestep 13270000
me

[2018-06-21 14:55:47,008] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video007000.mp4


Timestep 13340000
mean reward (100 episodes) 18.010000
best mean reward 18.680000
episodes 7002
exploration 0.100000
learning_rate 0.000050
Timestep 13350000
mean reward (100 episodes) 18.080000
best mean reward 18.680000
episodes 7007
exploration 0.100000
learning_rate 0.000050
Timestep 13360000
mean reward (100 episodes) 18.100000
best mean reward 18.680000
episodes 7012
exploration 0.100000
learning_rate 0.000050
Timestep 13370000
mean reward (100 episodes) 18.120000
best mean reward 18.680000
episodes 7018
exploration 0.100000
learning_rate 0.000050
Timestep 13380000
mean reward (100 episodes) 18.160000
best mean reward 18.680000
episodes 7023
exploration 0.100000
learning_rate 0.000050
Timestep 13390000
mean reward (100 episodes) 18.250000
best mean reward 18.680000
episodes 7028
exploration 0.100000
learning_rate 0.000050
Timestep 13400000
mean reward (100 episodes) 18.330000
best mean reward 18.680000
episodes 7034
exploration 0.100000
learning_rate 0.000050
Timestep 13410000
me

Timestep 13930000
mean reward (100 episodes) 17.990000
best mean reward 18.680000
episodes 7314
exploration 0.100000
learning_rate 0.000050
Timestep 13940000
mean reward (100 episodes) 17.940000
best mean reward 18.680000
episodes 7319
exploration 0.100000
learning_rate 0.000050
Timestep 13950000
mean reward (100 episodes) 18.030000
best mean reward 18.680000
episodes 7324
exploration 0.100000
learning_rate 0.000050
Timestep 13960000
mean reward (100 episodes) 18.010000
best mean reward 18.680000
episodes 7329
exploration 0.100000
learning_rate 0.000050
Timestep 13970000
mean reward (100 episodes) 17.990000
best mean reward 18.680000
episodes 7334
exploration 0.100000
learning_rate 0.000050
Timestep 13980000
mean reward (100 episodes) 17.990000
best mean reward 18.680000
episodes 7340
exploration 0.100000
learning_rate 0.000050
Timestep 13990000
mean reward (100 episodes) 18.020000
best mean reward 18.680000
episodes 7345
exploration 0.100000
learning_rate 0.000050
Timestep 14000000
me

Timestep 14520000
mean reward (100 episodes) 18.310000
best mean reward 18.680000
episodes 7626
exploration 0.100000
learning_rate 0.000050
Timestep 14530000
mean reward (100 episodes) 18.310000
best mean reward 18.680000
episodes 7632
exploration 0.100000
learning_rate 0.000050
Timestep 14540000
mean reward (100 episodes) 18.320000
best mean reward 18.680000
episodes 7637
exploration 0.100000
learning_rate 0.000050
Timestep 14550000
mean reward (100 episodes) 18.370000
best mean reward 18.680000
episodes 7642
exploration 0.100000
learning_rate 0.000050
Timestep 14560000
mean reward (100 episodes) 18.390000
best mean reward 18.680000
episodes 7647
exploration 0.100000
learning_rate 0.000050
Timestep 14570000
mean reward (100 episodes) 18.380000
best mean reward 18.680000
episodes 7653
exploration 0.100000
learning_rate 0.000050
Timestep 14580000
mean reward (100 episodes) 18.430000
best mean reward 18.680000
episodes 7658
exploration 0.100000
learning_rate 0.000050
Timestep 14590000
me

Timestep 15110000
mean reward (100 episodes) 18.010000
best mean reward 18.680000
episodes 7936
exploration 0.100000
learning_rate 0.000050
Timestep 15120000
mean reward (100 episodes) 18.000000
best mean reward 18.680000
episodes 7941
exploration 0.100000
learning_rate 0.000050
Timestep 15130000
mean reward (100 episodes) 17.950000
best mean reward 18.680000
episodes 7946
exploration 0.100000
learning_rate 0.000050
Timestep 15140000
mean reward (100 episodes) 18.040000
best mean reward 18.680000
episodes 7951
exploration 0.100000
learning_rate 0.000050
Timestep 15150000
mean reward (100 episodes) 17.910000
best mean reward 18.680000
episodes 7957
exploration 0.100000
learning_rate 0.000050
Timestep 15160000
mean reward (100 episodes) 17.890000
best mean reward 18.680000
episodes 7962
exploration 0.100000
learning_rate 0.000050
Timestep 15170000
mean reward (100 episodes) 17.880000
best mean reward 18.680000
episodes 7967
exploration 0.100000
learning_rate 0.000050
Timestep 15180000
me

[2018-06-21 16:43:06,494] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video008000.mp4


Timestep 15240000
mean reward (100 episodes) 18.070000
best mean reward 18.680000
episodes 8004
exploration 0.100000
learning_rate 0.000050
Timestep 15250000
mean reward (100 episodes) 18.030000
best mean reward 18.680000
episodes 8009
exploration 0.100000
learning_rate 0.000050
Timestep 15260000
mean reward (100 episodes) 18.020000
best mean reward 18.680000
episodes 8015
exploration 0.100000
learning_rate 0.000050
Timestep 15270000
mean reward (100 episodes) 17.990000
best mean reward 18.680000
episodes 8020
exploration 0.100000
learning_rate 0.000050
Timestep 15280000
mean reward (100 episodes) 17.960000
best mean reward 18.680000
episodes 8025
exploration 0.100000
learning_rate 0.000050
Timestep 15290000
mean reward (100 episodes) 17.940000
best mean reward 18.680000
episodes 8030
exploration 0.100000
learning_rate 0.000050
Timestep 15300000
mean reward (100 episodes) 18.000000
best mean reward 18.680000
episodes 8035
exploration 0.100000
learning_rate 0.000050
Timestep 15310000
me

Timestep 15830000
mean reward (100 episodes) 17.820000
best mean reward 18.680000
episodes 8313
exploration 0.100000
learning_rate 0.000050
Timestep 15840000
mean reward (100 episodes) 17.810000
best mean reward 18.680000
episodes 8319
exploration 0.100000
learning_rate 0.000050
Timestep 15850000
mean reward (100 episodes) 17.760000
best mean reward 18.680000
episodes 8324
exploration 0.100000
learning_rate 0.000050
Timestep 15860000
mean reward (100 episodes) 17.800000
best mean reward 18.680000
episodes 8329
exploration 0.100000
learning_rate 0.000050
Timestep 15870000
mean reward (100 episodes) 17.780000
best mean reward 18.680000
episodes 8335
exploration 0.100000
learning_rate 0.000050
Timestep 15880000
mean reward (100 episodes) 17.770000
best mean reward 18.680000
episodes 8340
exploration 0.100000
learning_rate 0.000050
Timestep 15890000
mean reward (100 episodes) 17.850000
best mean reward 18.680000
episodes 8345
exploration 0.100000
learning_rate 0.000050
Timestep 15900000
me

Timestep 16420000
mean reward (100 episodes) 18.220000
best mean reward 18.680000
episodes 8625
exploration 0.100000
learning_rate 0.000050
Timestep 16430000
mean reward (100 episodes) 18.180000
best mean reward 18.680000
episodes 8630
exploration 0.100000
learning_rate 0.000050
Timestep 16440000
mean reward (100 episodes) 18.170000
best mean reward 18.680000
episodes 8636
exploration 0.100000
learning_rate 0.000050
Timestep 16450000
mean reward (100 episodes) 18.260000
best mean reward 18.680000
episodes 8641
exploration 0.100000
learning_rate 0.000050
Timestep 16460000
mean reward (100 episodes) 18.330000
best mean reward 18.680000
episodes 8646
exploration 0.100000
learning_rate 0.000050
Timestep 16470000
mean reward (100 episodes) 18.380000
best mean reward 18.680000
episodes 8652
exploration 0.100000
learning_rate 0.000050
Timestep 16480000
mean reward (100 episodes) 18.400000
best mean reward 18.680000
episodes 8657
exploration 0.100000
learning_rate 0.000050
Timestep 16490000
me

Timestep 17010000
mean reward (100 episodes) 18.270000
best mean reward 18.680000
episodes 8938
exploration 0.100000
learning_rate 0.000050
Timestep 17020000
mean reward (100 episodes) 18.320000
best mean reward 18.680000
episodes 8944
exploration 0.100000
learning_rate 0.000050
Timestep 17030000
mean reward (100 episodes) 18.370000
best mean reward 18.680000
episodes 8949
exploration 0.100000
learning_rate 0.000050
Timestep 17040000
mean reward (100 episodes) 18.390000
best mean reward 18.680000
episodes 8954
exploration 0.100000
learning_rate 0.000050
Timestep 17050000
mean reward (100 episodes) 18.400000
best mean reward 18.680000
episodes 8960
exploration 0.100000
learning_rate 0.000050
Timestep 17060000
mean reward (100 episodes) 18.290000
best mean reward 18.680000
episodes 8965
exploration 0.100000
learning_rate 0.000050
Timestep 17070000
mean reward (100 episodes) 18.200000
best mean reward 18.680000
episodes 8970
exploration 0.100000
learning_rate 0.000050
Timestep 17080000
me

[2018-06-21 18:30:02,066] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video009000.mp4


Timestep 17130000
mean reward (100 episodes) 18.060000
best mean reward 18.680000
episodes 9001
exploration 0.100000
learning_rate 0.000050
Timestep 17140000
mean reward (100 episodes) 18.000000
best mean reward 18.680000
episodes 9007
exploration 0.100000
learning_rate 0.000050
Timestep 17150000
mean reward (100 episodes) 18.060000
best mean reward 18.680000
episodes 9012
exploration 0.100000
learning_rate 0.000050
Timestep 17160000
mean reward (100 episodes) 17.950000
best mean reward 18.680000
episodes 9017
exploration 0.100000
learning_rate 0.000050
Timestep 17170000
mean reward (100 episodes) 17.930000
best mean reward 18.680000
episodes 9022
exploration 0.100000
learning_rate 0.000050
Timestep 17180000
mean reward (100 episodes) 17.850000
best mean reward 18.680000
episodes 9027
exploration 0.100000
learning_rate 0.000050
Timestep 17190000
mean reward (100 episodes) 17.790000
best mean reward 18.680000
episodes 9032
exploration 0.100000
learning_rate 0.000050
Timestep 17200000
me

Timestep 21600000
mean reward (100 episodes) 18.360000
best mean reward 18.700000
episodes 11349
exploration 0.100000
learning_rate 0.000050
Timestep 21610000
mean reward (100 episodes) 18.340000
best mean reward 18.700000
episodes 11355
exploration 0.100000
learning_rate 0.000050
Timestep 21620000
mean reward (100 episodes) 18.340000
best mean reward 18.700000
episodes 11360
exploration 0.100000
learning_rate 0.000050
Timestep 21630000
mean reward (100 episodes) 18.240000
best mean reward 18.700000
episodes 11365
exploration 0.100000
learning_rate 0.000050
Timestep 21640000
mean reward (100 episodes) 18.170000
best mean reward 18.700000
episodes 11370
exploration 0.100000
learning_rate 0.000050
Timestep 21650000
mean reward (100 episodes) 18.120000
best mean reward 18.700000
episodes 11375
exploration 0.100000
learning_rate 0.000050
Timestep 21660000
mean reward (100 episodes) 18.280000
best mean reward 18.700000
episodes 11381
exploration 0.100000
learning_rate 0.000050
Timestep 2167

Timestep 22190000
mean reward (100 episodes) 17.950000
best mean reward 18.700000
episodes 11660
exploration 0.100000
learning_rate 0.000050
Timestep 22200000
mean reward (100 episodes) 18.070000
best mean reward 18.700000
episodes 11666
exploration 0.100000
learning_rate 0.000050
Timestep 22210000
mean reward (100 episodes) 18.090000
best mean reward 18.700000
episodes 11671
exploration 0.100000
learning_rate 0.000050
Timestep 22220000
mean reward (100 episodes) 18.070000
best mean reward 18.700000
episodes 11676
exploration 0.100000
learning_rate 0.000050
Timestep 22230000
mean reward (100 episodes) 18.190000
best mean reward 18.700000
episodes 11682
exploration 0.100000
learning_rate 0.000050
Timestep 22240000
mean reward (100 episodes) 18.160000
best mean reward 18.700000
episodes 11687
exploration 0.100000
learning_rate 0.000050
Timestep 22250000
mean reward (100 episodes) 18.070000
best mean reward 18.700000
episodes 11692
exploration 0.100000
learning_rate 0.000050
Timestep 2226

Timestep 22780000
mean reward (100 episodes) 17.970000
best mean reward 18.700000
episodes 11973
exploration 0.100000
learning_rate 0.000050
Timestep 22790000
mean reward (100 episodes) 18.030000
best mean reward 18.700000
episodes 11978
exploration 0.100000
learning_rate 0.000050
Timestep 22800000
mean reward (100 episodes) 18.010000
best mean reward 18.700000
episodes 11983
exploration 0.100000
learning_rate 0.000050
Timestep 22810000
mean reward (100 episodes) 18.070000
best mean reward 18.700000
episodes 11989
exploration 0.100000
learning_rate 0.000050
Timestep 22820000
mean reward (100 episodes) 18.120000
best mean reward 18.700000
episodes 11995
exploration 0.100000
learning_rate 0.000050


[2018-06-21 23:51:31,763] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video012000.mp4


Timestep 22830000
mean reward (100 episodes) 18.180000
best mean reward 18.700000
episodes 12000
exploration 0.100000
learning_rate 0.000050
Timestep 22840000
mean reward (100 episodes) 18.220000
best mean reward 18.700000
episodes 12005
exploration 0.100000
learning_rate 0.000050
Timestep 22850000
mean reward (100 episodes) 18.150000
best mean reward 18.700000
episodes 12011
exploration 0.100000
learning_rate 0.000050
Timestep 22860000
mean reward (100 episodes) 18.180000
best mean reward 18.700000
episodes 12016
exploration 0.100000
learning_rate 0.000050
Timestep 22870000
mean reward (100 episodes) 18.050000
best mean reward 18.700000
episodes 12021
exploration 0.100000
learning_rate 0.000050
Timestep 22880000
mean reward (100 episodes) 18.040000
best mean reward 18.700000
episodes 12026
exploration 0.100000
learning_rate 0.000050
Timestep 22890000
mean reward (100 episodes) 18.140000
best mean reward 18.700000
episodes 12032
exploration 0.100000
learning_rate 0.000050
Timestep 2290

Timestep 23420000
mean reward (100 episodes) 17.850000
best mean reward 18.700000
episodes 12311
exploration 0.100000
learning_rate 0.000050
Timestep 23430000
mean reward (100 episodes) 17.880000
best mean reward 18.700000
episodes 12317
exploration 0.100000
learning_rate 0.000050
Timestep 23440000
mean reward (100 episodes) 17.830000
best mean reward 18.700000
episodes 12322
exploration 0.100000
learning_rate 0.000050
Timestep 23450000
mean reward (100 episodes) 17.860000
best mean reward 18.700000
episodes 12327
exploration 0.100000
learning_rate 0.000050
Timestep 23460000
mean reward (100 episodes) 17.810000
best mean reward 18.700000
episodes 12333
exploration 0.100000
learning_rate 0.000050
Timestep 23470000
mean reward (100 episodes) 17.800000
best mean reward 18.700000
episodes 12338
exploration 0.100000
learning_rate 0.000050
Timestep 23480000
mean reward (100 episodes) 17.820000
best mean reward 18.700000
episodes 12343
exploration 0.100000
learning_rate 0.000050
Timestep 2349

Timestep 24010000
mean reward (100 episodes) 17.950000
best mean reward 18.700000
episodes 12622
exploration 0.100000
learning_rate 0.000050
Timestep 24020000
mean reward (100 episodes) 18.010000
best mean reward 18.700000
episodes 12627
exploration 0.100000
learning_rate 0.000050
Timestep 24030000
mean reward (100 episodes) 17.950000
best mean reward 18.700000
episodes 12632
exploration 0.100000
learning_rate 0.000050
Timestep 24040000
mean reward (100 episodes) 17.970000
best mean reward 18.700000
episodes 12637
exploration 0.100000
learning_rate 0.000050
Timestep 24050000
mean reward (100 episodes) 18.050000
best mean reward 18.700000
episodes 12643
exploration 0.100000
learning_rate 0.000050
Timestep 24060000
mean reward (100 episodes) 18.030000
best mean reward 18.700000
episodes 12648
exploration 0.100000
learning_rate 0.000050
Timestep 24070000
mean reward (100 episodes) 18.010000
best mean reward 18.700000
episodes 12654
exploration 0.100000
learning_rate 0.000050
Timestep 2408

Timestep 24600000
mean reward (100 episodes) 17.660000
best mean reward 18.700000
episodes 12932
exploration 0.100000
learning_rate 0.000050
Timestep 24610000
mean reward (100 episodes) 17.790000
best mean reward 18.700000
episodes 12937
exploration 0.100000
learning_rate 0.000050
Timestep 24620000
mean reward (100 episodes) 17.830000
best mean reward 18.700000
episodes 12943
exploration 0.100000
learning_rate 0.000050
Timestep 24630000
mean reward (100 episodes) 17.970000
best mean reward 18.700000
episodes 12948
exploration 0.100000
learning_rate 0.000050
Timestep 24640000
mean reward (100 episodes) 17.950000
best mean reward 18.700000
episodes 12953
exploration 0.100000
learning_rate 0.000050
Timestep 24650000
mean reward (100 episodes) 17.920000
best mean reward 18.700000
episodes 12958
exploration 0.100000
learning_rate 0.000050
Timestep 24660000
mean reward (100 episodes) 17.870000
best mean reward 18.700000
episodes 12963
exploration 0.100000
learning_rate 0.000050
Timestep 2467

[2018-06-22 01:38:35,856] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video013000.mp4


Timestep 24730000
mean reward (100 episodes) 18.130000
best mean reward 18.700000
episodes 13002
exploration 0.100000
learning_rate 0.000050
Timestep 24740000
mean reward (100 episodes) 18.020000
best mean reward 18.700000
episodes 13007
exploration 0.100000
learning_rate 0.000050
Timestep 24750000
mean reward (100 episodes) 18.040000
best mean reward 18.700000
episodes 13012
exploration 0.100000
learning_rate 0.000050
Timestep 24760000
mean reward (100 episodes) 18.170000
best mean reward 18.700000
episodes 13017
exploration 0.100000
learning_rate 0.000050
Timestep 24770000
mean reward (100 episodes) 18.220000
best mean reward 18.700000
episodes 13022
exploration 0.100000
learning_rate 0.000050
Timestep 24780000
mean reward (100 episodes) 18.210000
best mean reward 18.700000
episodes 13028
exploration 0.100000
learning_rate 0.000050
Timestep 24790000
mean reward (100 episodes) 18.340000
best mean reward 18.700000
episodes 13033
exploration 0.100000
learning_rate 0.000050
Timestep 2480

Timestep 25320000
mean reward (100 episodes) 18.280000
best mean reward 18.700000
episodes 13312
exploration 0.100000
learning_rate 0.000050
Timestep 25330000
mean reward (100 episodes) 18.250000
best mean reward 18.700000
episodes 13317
exploration 0.100000
learning_rate 0.000050
Timestep 25340000
mean reward (100 episodes) 18.210000
best mean reward 18.700000
episodes 13323
exploration 0.100000
learning_rate 0.000050
Timestep 25350000
mean reward (100 episodes) 18.160000
best mean reward 18.700000
episodes 13327
exploration 0.100000
learning_rate 0.000050
Timestep 25360000
mean reward (100 episodes) 18.230000
best mean reward 18.700000
episodes 13333
exploration 0.100000
learning_rate 0.000050
Timestep 25370000
mean reward (100 episodes) 18.240000
best mean reward 18.700000
episodes 13338
exploration 0.100000
learning_rate 0.000050
Timestep 25380000
mean reward (100 episodes) 18.240000
best mean reward 18.700000
episodes 13343
exploration 0.100000
learning_rate 0.000050
Timestep 2539

Timestep 25910000
mean reward (100 episodes) 18.210000
best mean reward 18.790000
episodes 13626
exploration 0.100000
learning_rate 0.000050
Timestep 25920000
mean reward (100 episodes) 18.050000
best mean reward 18.790000
episodes 13631
exploration 0.100000
learning_rate 0.000050
Timestep 25930000
mean reward (100 episodes) 18.010000
best mean reward 18.790000
episodes 13637
exploration 0.100000
learning_rate 0.000050
Timestep 25940000
mean reward (100 episodes) 17.960000
best mean reward 18.790000
episodes 13642
exploration 0.100000
learning_rate 0.000050
Timestep 25950000
mean reward (100 episodes) 18.140000
best mean reward 18.790000
episodes 13647
exploration 0.100000
learning_rate 0.000050
Timestep 25960000
mean reward (100 episodes) 18.190000
best mean reward 18.790000
episodes 13653
exploration 0.100000
learning_rate 0.000050
Timestep 25970000
mean reward (100 episodes) 18.150000
best mean reward 18.790000
episodes 13658
exploration 0.100000
learning_rate 0.000050
Timestep 2598

Timestep 26500000
mean reward (100 episodes) 18.030000
best mean reward 18.790000
episodes 13938
exploration 0.100000
learning_rate 0.000050
Timestep 26510000
mean reward (100 episodes) 18.070000
best mean reward 18.790000
episodes 13944
exploration 0.100000
learning_rate 0.000050
Timestep 26520000
mean reward (100 episodes) 18.100000
best mean reward 18.790000
episodes 13949
exploration 0.100000
learning_rate 0.000050
Timestep 26530000
mean reward (100 episodes) 18.160000
best mean reward 18.790000
episodes 13954
exploration 0.100000
learning_rate 0.000050
Timestep 26540000
mean reward (100 episodes) 18.240000
best mean reward 18.790000
episodes 13960
exploration 0.100000
learning_rate 0.000050
Timestep 26550000
mean reward (100 episodes) 18.190000
best mean reward 18.790000
episodes 13965
exploration 0.100000
learning_rate 0.000050
Timestep 26560000
mean reward (100 episodes) 18.100000
best mean reward 18.790000
episodes 13970
exploration 0.100000
learning_rate 0.000050
Timestep 2657

[2018-06-22 03:25:10,546] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video014000.mp4


Timestep 26620000
mean reward (100 episodes) 18.120000
best mean reward 18.790000
episodes 14003
exploration 0.100000
learning_rate 0.000050
Timestep 26630000
mean reward (100 episodes) 18.130000
best mean reward 18.790000
episodes 14008
exploration 0.100000
learning_rate 0.000050
Timestep 26640000
mean reward (100 episodes) 18.160000
best mean reward 18.790000
episodes 14013
exploration 0.100000
learning_rate 0.000050
Timestep 26650000
mean reward (100 episodes) 18.150000
best mean reward 18.790000
episodes 14019
exploration 0.100000
learning_rate 0.000050
Timestep 26660000
mean reward (100 episodes) 18.090000
best mean reward 18.790000
episodes 14024
exploration 0.100000
learning_rate 0.000050
Timestep 26670000
mean reward (100 episodes) 18.080000
best mean reward 18.790000
episodes 14029
exploration 0.100000
learning_rate 0.000050
Timestep 26680000
mean reward (100 episodes) 18.090000
best mean reward 18.790000
episodes 14034
exploration 0.100000
learning_rate 0.000050
Timestep 2669

Timestep 27210000
mean reward (100 episodes) 17.890000
best mean reward 18.790000
episodes 14313
exploration 0.100000
learning_rate 0.000050
Timestep 27220000
mean reward (100 episodes) 18.000000
best mean reward 18.790000
episodes 14319
exploration 0.100000
learning_rate 0.000050
Timestep 27230000
mean reward (100 episodes) 17.980000
best mean reward 18.790000
episodes 14324
exploration 0.100000
learning_rate 0.000050
Timestep 27240000
mean reward (100 episodes) 18.010000
best mean reward 18.790000
episodes 14330
exploration 0.100000
learning_rate 0.000050
Timestep 27250000
mean reward (100 episodes) 17.970000
best mean reward 18.790000
episodes 14335
exploration 0.100000
learning_rate 0.000050
Timestep 27260000
mean reward (100 episodes) 17.840000
best mean reward 18.790000
episodes 14340
exploration 0.100000
learning_rate 0.000050
Timestep 27270000
mean reward (100 episodes) 17.750000
best mean reward 18.790000
episodes 14345
exploration 0.100000
learning_rate 0.000050
Timestep 2728

Timestep 27800000
mean reward (100 episodes) 18.090000
best mean reward 18.790000
episodes 14627
exploration 0.100000
learning_rate 0.000050
Timestep 27810000
mean reward (100 episodes) 18.050000
best mean reward 18.790000
episodes 14632
exploration 0.100000
learning_rate 0.000050
Timestep 27820000
mean reward (100 episodes) 17.930000
best mean reward 18.790000
episodes 14637
exploration 0.100000
learning_rate 0.000050
Timestep 27830000
mean reward (100 episodes) 17.970000
best mean reward 18.790000
episodes 14642
exploration 0.100000
learning_rate 0.000050
Timestep 27840000
mean reward (100 episodes) 17.960000
best mean reward 18.790000
episodes 14647
exploration 0.100000
learning_rate 0.000050
Timestep 27850000
mean reward (100 episodes) 18.010000
best mean reward 18.790000
episodes 14653
exploration 0.100000
learning_rate 0.000050
Timestep 27860000
mean reward (100 episodes) 17.960000
best mean reward 18.790000
episodes 14658
exploration 0.100000
learning_rate 0.000050
Timestep 2787

Timestep 28390000
mean reward (100 episodes) 18.280000
best mean reward 18.790000
episodes 14940
exploration 0.100000
learning_rate 0.000050
Timestep 28400000
mean reward (100 episodes) 18.260000
best mean reward 18.790000
episodes 14946
exploration 0.100000
learning_rate 0.000050
Timestep 28410000
mean reward (100 episodes) 18.260000
best mean reward 18.790000
episodes 14951
exploration 0.100000
learning_rate 0.000050
Timestep 28420000
mean reward (100 episodes) 18.290000
best mean reward 18.790000
episodes 14956
exploration 0.100000
learning_rate 0.000050
Timestep 28430000
mean reward (100 episodes) 18.390000
best mean reward 18.790000
episodes 14962
exploration 0.100000
learning_rate 0.000050
Timestep 28440000
mean reward (100 episodes) 18.390000
best mean reward 18.790000
episodes 14967
exploration 0.100000
learning_rate 0.000050
Timestep 28450000
mean reward (100 episodes) 18.480000
best mean reward 18.790000
episodes 14972
exploration 0.100000
learning_rate 0.000050
Timestep 2846

[2018-06-22 05:11:18,343] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video015000.mp4


Timestep 28510000
mean reward (100 episodes) 18.440000
best mean reward 18.790000
episodes 15005
exploration 0.100000
learning_rate 0.000050
Timestep 28520000
mean reward (100 episodes) 18.350000
best mean reward 18.790000
episodes 15010
exploration 0.100000
learning_rate 0.000050
Timestep 28530000
mean reward (100 episodes) 18.310000
best mean reward 18.790000
episodes 15015
exploration 0.100000
learning_rate 0.000050
Timestep 28540000
mean reward (100 episodes) 18.280000
best mean reward 18.790000
episodes 15020
exploration 0.100000
learning_rate 0.000050
Timestep 28550000
mean reward (100 episodes) 18.290000
best mean reward 18.790000
episodes 15025
exploration 0.100000
learning_rate 0.000050
Timestep 28560000
mean reward (100 episodes) 18.220000
best mean reward 18.790000
episodes 15030
exploration 0.100000
learning_rate 0.000050
Timestep 28570000
mean reward (100 episodes) 18.220000
best mean reward 18.790000
episodes 15036
exploration 0.100000
learning_rate 0.000050
Timestep 2858

Timestep 29100000
mean reward (100 episodes) 17.900000
best mean reward 18.790000
episodes 15314
exploration 0.100000
learning_rate 0.000050
Timestep 29110000
mean reward (100 episodes) 17.870000
best mean reward 18.790000
episodes 15319
exploration 0.100000
learning_rate 0.000050
Timestep 29120000
mean reward (100 episodes) 17.820000
best mean reward 18.790000
episodes 15325
exploration 0.100000
learning_rate 0.000050
Timestep 29130000
mean reward (100 episodes) 17.850000
best mean reward 18.790000
episodes 15330
exploration 0.100000
learning_rate 0.000050
Timestep 29140000
mean reward (100 episodes) 17.880000
best mean reward 18.790000
episodes 15336
exploration 0.100000
learning_rate 0.000050
Timestep 29150000
mean reward (100 episodes) 17.940000
best mean reward 18.790000
episodes 15341
exploration 0.100000
learning_rate 0.000050
Timestep 29160000
mean reward (100 episodes) 18.000000
best mean reward 18.790000
episodes 15346
exploration 0.100000
learning_rate 0.000050
Timestep 2917

Timestep 29690000
mean reward (100 episodes) 17.900000
best mean reward 18.790000
episodes 15623
exploration 0.100000
learning_rate 0.000050
Timestep 29700000
mean reward (100 episodes) 17.930000
best mean reward 18.790000
episodes 15628
exploration 0.100000
learning_rate 0.000050
Timestep 29710000
mean reward (100 episodes) 17.990000
best mean reward 18.790000
episodes 15634
exploration 0.100000
learning_rate 0.000050
Timestep 29720000
mean reward (100 episodes) 17.950000
best mean reward 18.790000
episodes 15639
exploration 0.100000
learning_rate 0.000050
Timestep 29730000
mean reward (100 episodes) 17.950000
best mean reward 18.790000
episodes 15644
exploration 0.100000
learning_rate 0.000050
Timestep 29740000
mean reward (100 episodes) 17.780000
best mean reward 18.790000
episodes 15648
exploration 0.100000
learning_rate 0.000050
Timestep 29750000
mean reward (100 episodes) 17.890000
best mean reward 18.790000
episodes 15654
exploration 0.100000
learning_rate 0.000050
Timestep 2976

Timestep 30280000
mean reward (100 episodes) 18.380000
best mean reward 18.790000
episodes 15936
exploration 0.100000
learning_rate 0.000050
Timestep 30290000
mean reward (100 episodes) 18.300000
best mean reward 18.790000
episodes 15941
exploration 0.100000
learning_rate 0.000050
Timestep 30300000
mean reward (100 episodes) 18.290000
best mean reward 18.790000
episodes 15946
exploration 0.100000
learning_rate 0.000050
Timestep 30310000
mean reward (100 episodes) 18.350000
best mean reward 18.790000
episodes 15952
exploration 0.100000
learning_rate 0.000050
Timestep 30320000
mean reward (100 episodes) 18.430000
best mean reward 18.790000
episodes 15957
exploration 0.100000
learning_rate 0.000050
Timestep 30330000
mean reward (100 episodes) 18.400000
best mean reward 18.790000
episodes 15962
exploration 0.100000
learning_rate 0.000050
Timestep 30340000
mean reward (100 episodes) 18.340000
best mean reward 18.790000
episodes 15968
exploration 0.100000
learning_rate 0.000050
Timestep 3035

[2018-06-22 06:58:44,406] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video016000.mp4


Timestep 30410000
mean reward (100 episodes) 18.280000
best mean reward 18.790000
episodes 16004
exploration 0.100000
learning_rate 0.000050
Timestep 30420000
mean reward (100 episodes) 18.420000
best mean reward 18.790000
episodes 16010
exploration 0.100000
learning_rate 0.000050
Timestep 30430000
mean reward (100 episodes) 18.400000
best mean reward 18.790000
episodes 16015
exploration 0.100000
learning_rate 0.000050
Timestep 30440000
mean reward (100 episodes) 18.360000
best mean reward 18.790000
episodes 16020
exploration 0.100000
learning_rate 0.000050
Timestep 30450000
mean reward (100 episodes) 18.380000
best mean reward 18.790000
episodes 16026
exploration 0.100000
learning_rate 0.000050
Timestep 30460000
mean reward (100 episodes) 18.290000
best mean reward 18.790000
episodes 16031
exploration 0.100000
learning_rate 0.000050
Timestep 30470000
mean reward (100 episodes) 18.310000
best mean reward 18.790000
episodes 16037
exploration 0.100000
learning_rate 0.000050
Timestep 3048

Timestep 31000000
mean reward (100 episodes) 18.160000
best mean reward 18.790000
episodes 16319
exploration 0.100000
learning_rate 0.000050
Timestep 31010000
mean reward (100 episodes) 18.180000
best mean reward 18.790000
episodes 16324
exploration 0.100000
learning_rate 0.000050
Timestep 31020000
mean reward (100 episodes) 18.170000
best mean reward 18.790000
episodes 16329
exploration 0.100000
learning_rate 0.000050
Timestep 31030000
mean reward (100 episodes) 18.130000
best mean reward 18.790000
episodes 16334
exploration 0.100000
learning_rate 0.000050
Timestep 31040000
mean reward (100 episodes) 18.090000
best mean reward 18.790000
episodes 16339
exploration 0.100000
learning_rate 0.000050
Timestep 31050000
mean reward (100 episodes) 18.080000
best mean reward 18.790000
episodes 16344
exploration 0.100000
learning_rate 0.000050
Timestep 31060000
mean reward (100 episodes) 18.030000
best mean reward 18.790000
episodes 16350
exploration 0.100000
learning_rate 0.000050
Timestep 3107

Timestep 31590000
mean reward (100 episodes) 18.220000
best mean reward 18.790000
episodes 16631
exploration 0.100000
learning_rate 0.000050
Timestep 31600000
mean reward (100 episodes) 18.230000
best mean reward 18.790000
episodes 16636
exploration 0.100000
learning_rate 0.000050
Timestep 31610000
mean reward (100 episodes) 18.250000
best mean reward 18.790000
episodes 16642
exploration 0.100000
learning_rate 0.000050
Timestep 31620000
mean reward (100 episodes) 18.240000
best mean reward 18.790000
episodes 16647
exploration 0.100000
learning_rate 0.000050
Timestep 31630000
mean reward (100 episodes) 18.320000
best mean reward 18.790000
episodes 16652
exploration 0.100000
learning_rate 0.000050
Timestep 31640000
mean reward (100 episodes) 18.290000
best mean reward 18.790000
episodes 16657
exploration 0.100000
learning_rate 0.000050
Timestep 31650000
mean reward (100 episodes) 18.340000
best mean reward 18.790000
episodes 16663
exploration 0.100000
learning_rate 0.000050
Timestep 3166

Timestep 32180000
mean reward (100 episodes) 18.190000
best mean reward 18.790000
episodes 16943
exploration 0.100000
learning_rate 0.000050
Timestep 32190000
mean reward (100 episodes) 18.150000
best mean reward 18.790000
episodes 16949
exploration 0.100000
learning_rate 0.000050
Timestep 32200000
mean reward (100 episodes) 18.200000
best mean reward 18.790000
episodes 16954
exploration 0.100000
learning_rate 0.000050
Timestep 32210000
mean reward (100 episodes) 18.180000
best mean reward 18.790000
episodes 16959
exploration 0.100000
learning_rate 0.000050
Timestep 32220000
mean reward (100 episodes) 18.160000
best mean reward 18.790000
episodes 16964
exploration 0.100000
learning_rate 0.000050
Timestep 32230000
mean reward (100 episodes) 18.180000
best mean reward 18.790000
episodes 16970
exploration 0.100000
learning_rate 0.000050
Timestep 32240000
mean reward (100 episodes) 18.130000
best mean reward 18.790000
episodes 16975
exploration 0.100000
learning_rate 0.000050
Timestep 3225

[2018-06-22 08:44:55,541] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video017000.mp4


Timestep 32290000
mean reward (100 episodes) 18.020000
best mean reward 18.790000
episodes 17001
exploration 0.100000
learning_rate 0.000050
Timestep 32300000
mean reward (100 episodes) 18.030000
best mean reward 18.790000
episodes 17006
exploration 0.100000
learning_rate 0.000050
Timestep 32310000
mean reward (100 episodes) 18.000000
best mean reward 18.790000
episodes 17011
exploration 0.100000
learning_rate 0.000050
Timestep 32320000
mean reward (100 episodes) 17.950000
best mean reward 18.790000
episodes 17017
exploration 0.100000
learning_rate 0.000050
Timestep 32330000
mean reward (100 episodes) 17.980000
best mean reward 18.790000
episodes 17022
exploration 0.100000
learning_rate 0.000050
Timestep 32340000
mean reward (100 episodes) 18.030000
best mean reward 18.790000
episodes 17027
exploration 0.100000
learning_rate 0.000050
Timestep 32350000
mean reward (100 episodes) 18.020000
best mean reward 18.790000
episodes 17033
exploration 0.100000
learning_rate 0.000050
Timestep 3236

Timestep 32880000
mean reward (100 episodes) 18.060000
best mean reward 18.790000
episodes 17314
exploration 0.100000
learning_rate 0.000050
Timestep 32890000
mean reward (100 episodes) 18.050000
best mean reward 18.790000
episodes 17319
exploration 0.100000
learning_rate 0.000050
Timestep 32900000
mean reward (100 episodes) 18.070000
best mean reward 18.790000
episodes 17325
exploration 0.100000
learning_rate 0.000050
Timestep 32910000
mean reward (100 episodes) 18.120000
best mean reward 18.790000
episodes 17330
exploration 0.100000
learning_rate 0.000050
Timestep 32920000
mean reward (100 episodes) 18.060000
best mean reward 18.790000
episodes 17335
exploration 0.100000
learning_rate 0.000050
Timestep 32930000
mean reward (100 episodes) 18.000000
best mean reward 18.790000
episodes 17341
exploration 0.100000
learning_rate 0.000050
Timestep 32940000
mean reward (100 episodes) 18.030000
best mean reward 18.790000
episodes 17346
exploration 0.100000
learning_rate 0.000050
Timestep 3295

Timestep 33470000
mean reward (100 episodes) 17.930000
best mean reward 18.790000
episodes 17627
exploration 0.100000
learning_rate 0.000050
Timestep 33480000
mean reward (100 episodes) 17.870000
best mean reward 18.790000
episodes 17632
exploration 0.100000
learning_rate 0.000050
Timestep 33490000
mean reward (100 episodes) 17.930000
best mean reward 18.790000
episodes 17637
exploration 0.100000
learning_rate 0.000050
Timestep 33500000
mean reward (100 episodes) 17.850000
best mean reward 18.790000
episodes 17642
exploration 0.100000
learning_rate 0.000050
Timestep 33510000
mean reward (100 episodes) 17.940000
best mean reward 18.790000
episodes 17648
exploration 0.100000
learning_rate 0.000050
Timestep 33520000
mean reward (100 episodes) 17.980000
best mean reward 18.790000
episodes 17653
exploration 0.100000
learning_rate 0.000050
Timestep 33530000
mean reward (100 episodes) 18.170000
best mean reward 18.790000
episodes 17659
exploration 0.100000
learning_rate 0.000050
Timestep 3354

Timestep 34060000
mean reward (100 episodes) 17.770000
best mean reward 18.790000
episodes 17939
exploration 0.100000
learning_rate 0.000050
Timestep 34070000
mean reward (100 episodes) 17.740000
best mean reward 18.790000
episodes 17944
exploration 0.100000
learning_rate 0.000050
Timestep 34080000
mean reward (100 episodes) 17.840000
best mean reward 18.790000
episodes 17949
exploration 0.100000
learning_rate 0.000050
Timestep 34090000
mean reward (100 episodes) 17.950000
best mean reward 18.790000
episodes 17955
exploration 0.100000
learning_rate 0.000050
Timestep 34100000
mean reward (100 episodes) 17.840000
best mean reward 18.790000
episodes 17960
exploration 0.100000
learning_rate 0.000050
Timestep 34110000
mean reward (100 episodes) 17.860000
best mean reward 18.790000
episodes 17965
exploration 0.100000
learning_rate 0.000050
Timestep 34120000
mean reward (100 episodes) 17.760000
best mean reward 18.790000
episodes 17970
exploration 0.100000
learning_rate 0.000050
Timestep 3413

[2018-06-22 10:31:20,669] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video018000.mp4


Timestep 34180000
mean reward (100 episodes) 17.640000
best mean reward 18.790000
episodes 18001
exploration 0.100000
learning_rate 0.000050
Timestep 34190000
mean reward (100 episodes) 17.640000
best mean reward 18.790000
episodes 18006
exploration 0.100000
learning_rate 0.000050
Timestep 34200000
mean reward (100 episodes) 17.710000
best mean reward 18.790000
episodes 18011
exploration 0.100000
learning_rate 0.000050
Timestep 34210000
mean reward (100 episodes) 17.790000
best mean reward 18.790000
episodes 18017
exploration 0.100000
learning_rate 0.000050
Timestep 34220000
mean reward (100 episodes) 17.850000
best mean reward 18.790000
episodes 18022
exploration 0.100000
learning_rate 0.000050
Timestep 34230000
mean reward (100 episodes) 17.900000
best mean reward 18.790000
episodes 18027
exploration 0.100000
learning_rate 0.000050
Timestep 34240000
mean reward (100 episodes) 17.910000
best mean reward 18.790000
episodes 18033
exploration 0.100000
learning_rate 0.000050
Timestep 3425

Timestep 34770000
mean reward (100 episodes) 17.980000
best mean reward 18.790000
episodes 18312
exploration 0.100000
learning_rate 0.000050
Timestep 34780000
mean reward (100 episodes) 18.020000
best mean reward 18.790000
episodes 18318
exploration 0.100000
learning_rate 0.000050
Timestep 34790000
mean reward (100 episodes) 17.990000
best mean reward 18.790000
episodes 18323
exploration 0.100000
learning_rate 0.000050
Timestep 34800000
mean reward (100 episodes) 18.070000
best mean reward 18.790000
episodes 18328
exploration 0.100000
learning_rate 0.000050
Timestep 34810000
mean reward (100 episodes) 18.040000
best mean reward 18.790000
episodes 18334
exploration 0.100000
learning_rate 0.000050
Timestep 34820000
mean reward (100 episodes) 18.010000
best mean reward 18.790000
episodes 18339
exploration 0.100000
learning_rate 0.000050
Timestep 34830000
mean reward (100 episodes) 18.060000
best mean reward 18.790000
episodes 18344
exploration 0.100000
learning_rate 0.000050
Timestep 3484

Timestep 35360000
mean reward (100 episodes) 18.270000
best mean reward 18.790000
episodes 18627
exploration 0.100000
learning_rate 0.000050
Timestep 35370000
mean reward (100 episodes) 18.240000
best mean reward 18.790000
episodes 18633
exploration 0.100000
learning_rate 0.000050
Timestep 35380000
mean reward (100 episodes) 18.240000
best mean reward 18.790000
episodes 18638
exploration 0.100000
learning_rate 0.000050
Timestep 35390000
mean reward (100 episodes) 18.300000
best mean reward 18.790000
episodes 18643
exploration 0.100000
learning_rate 0.000050
Timestep 35400000
mean reward (100 episodes) 18.260000
best mean reward 18.790000
episodes 18649
exploration 0.100000
learning_rate 0.000050
Timestep 35410000
mean reward (100 episodes) 18.210000
best mean reward 18.790000
episodes 18654
exploration 0.100000
learning_rate 0.000050
Timestep 35420000
mean reward (100 episodes) 18.210000
best mean reward 18.790000
episodes 18659
exploration 0.100000
learning_rate 0.000050
Timestep 3543

Timestep 35950000
mean reward (100 episodes) 18.020000
best mean reward 18.790000
episodes 18939
exploration 0.100000
learning_rate 0.000050
Timestep 35960000
mean reward (100 episodes) 17.990000
best mean reward 18.790000
episodes 18945
exploration 0.100000
learning_rate 0.000050
Timestep 35970000
mean reward (100 episodes) 18.050000
best mean reward 18.790000
episodes 18950
exploration 0.100000
learning_rate 0.000050
Timestep 35980000
mean reward (100 episodes) 18.040000
best mean reward 18.790000
episodes 18956
exploration 0.100000
learning_rate 0.000050
Timestep 35990000
mean reward (100 episodes) 18.060000
best mean reward 18.790000
episodes 18961
exploration 0.100000
learning_rate 0.000050
Timestep 36000000
mean reward (100 episodes) 18.110000
best mean reward 18.790000
episodes 18966
exploration 0.100000
learning_rate 0.000050
Timestep 36010000
mean reward (100 episodes) 18.050000
best mean reward 18.790000
episodes 18971
exploration 0.100000
learning_rate 0.000050
Timestep 3602

[2018-06-22 12:18:03,397] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video019000.mp4


Timestep 36070000
mean reward (100 episodes) 17.990000
best mean reward 18.790000
episodes 19002
exploration 0.100000
learning_rate 0.000050
Timestep 36080000
mean reward (100 episodes) 18.060000
best mean reward 18.790000
episodes 19008
exploration 0.100000
learning_rate 0.000050
Timestep 36090000
mean reward (100 episodes) 18.050000
best mean reward 18.790000
episodes 19013
exploration 0.100000
learning_rate 0.000050
Timestep 36100000
mean reward (100 episodes) 18.090000
best mean reward 18.790000
episodes 19019
exploration 0.100000
learning_rate 0.000050
Timestep 36110000
mean reward (100 episodes) 18.120000
best mean reward 18.790000
episodes 19024
exploration 0.100000
learning_rate 0.000050
Timestep 36120000
mean reward (100 episodes) 18.120000
best mean reward 18.790000
episodes 19029
exploration 0.100000
learning_rate 0.000050
Timestep 36130000
mean reward (100 episodes) 18.070000
best mean reward 18.790000
episodes 19034
exploration 0.100000
learning_rate 0.000050
Timestep 3614

Timestep 36660000
mean reward (100 episodes) 18.040000
best mean reward 18.790000
episodes 19315
exploration 0.100000
learning_rate 0.000050
Timestep 36670000
mean reward (100 episodes) 18.030000
best mean reward 18.790000
episodes 19320
exploration 0.100000
learning_rate 0.000050
Timestep 36680000
mean reward (100 episodes) 17.980000
best mean reward 18.790000
episodes 19325
exploration 0.100000
learning_rate 0.000050
Timestep 36690000
mean reward (100 episodes) 17.940000
best mean reward 18.790000
episodes 19330
exploration 0.100000
learning_rate 0.000050
Timestep 36700000
mean reward (100 episodes) 18.010000
best mean reward 18.790000
episodes 19336
exploration 0.100000
learning_rate 0.000050
Timestep 36710000
mean reward (100 episodes) 18.040000
best mean reward 18.790000
episodes 19341
exploration 0.100000
learning_rate 0.000050
Timestep 36720000
mean reward (100 episodes) 18.050000
best mean reward 18.790000
episodes 19347
exploration 0.100000
learning_rate 0.000050
Timestep 3673

Timestep 37250000
mean reward (100 episodes) 17.990000
best mean reward 18.790000
episodes 19628
exploration 0.100000
learning_rate 0.000050
Timestep 37260000
mean reward (100 episodes) 18.020000
best mean reward 18.790000
episodes 19633
exploration 0.100000
learning_rate 0.000050
Timestep 37270000
mean reward (100 episodes) 18.030000
best mean reward 18.790000
episodes 19638
exploration 0.100000
learning_rate 0.000050
Timestep 37280000
mean reward (100 episodes) 18.060000
best mean reward 18.790000
episodes 19644
exploration 0.100000
learning_rate 0.000050
Timestep 37290000
mean reward (100 episodes) 17.970000
best mean reward 18.790000
episodes 19649
exploration 0.100000
learning_rate 0.000050
Timestep 37300000
mean reward (100 episodes) 17.890000
best mean reward 18.790000
episodes 19654
exploration 0.100000
learning_rate 0.000050
Timestep 37310000
mean reward (100 episodes) 17.870000
best mean reward 18.790000
episodes 19659
exploration 0.100000
learning_rate 0.000050
Timestep 3732

Timestep 37840000
mean reward (100 episodes) 18.070000
best mean reward 18.790000
episodes 19940
exploration 0.100000
learning_rate 0.000050
Timestep 37850000
mean reward (100 episodes) 18.010000
best mean reward 18.790000
episodes 19946
exploration 0.100000
learning_rate 0.000050
Timestep 37860000
mean reward (100 episodes) 17.960000
best mean reward 18.790000
episodes 19951
exploration 0.100000
learning_rate 0.000050
Timestep 37870000
mean reward (100 episodes) 17.930000
best mean reward 18.790000
episodes 19956
exploration 0.100000
learning_rate 0.000050
Timestep 37880000
mean reward (100 episodes) 17.820000
best mean reward 18.790000
episodes 19961
exploration 0.100000
learning_rate 0.000050
Timestep 37890000
mean reward (100 episodes) 17.770000
best mean reward 18.790000
episodes 19967
exploration 0.100000
learning_rate 0.000050
Timestep 37900000
mean reward (100 episodes) 17.650000
best mean reward 18.790000
episodes 19971
exploration 0.100000
learning_rate 0.000050
Timestep 3791

[2018-06-22 14:05:03,122] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video020000.mp4


Timestep 37960000
mean reward (100 episodes) 17.850000
best mean reward 18.790000
episodes 20004
exploration 0.100000
learning_rate 0.000050
Timestep 37970000
mean reward (100 episodes) 17.790000
best mean reward 18.790000
episodes 20009
exploration 0.100000
learning_rate 0.000050
Timestep 37980000
mean reward (100 episodes) 17.830000
best mean reward 18.790000
episodes 20014
exploration 0.100000
learning_rate 0.000050
Timestep 37990000
mean reward (100 episodes) 17.920000
best mean reward 18.790000
episodes 20020
exploration 0.100000
learning_rate 0.000050
Timestep 38000000
mean reward (100 episodes) 17.950000
best mean reward 18.790000
episodes 20025
exploration 0.100000
learning_rate 0.000050
Timestep 38010000
mean reward (100 episodes) 17.970000
best mean reward 18.790000
episodes 20030
exploration 0.100000
learning_rate 0.000050
Timestep 38020000
mean reward (100 episodes) 18.040000
best mean reward 18.790000
episodes 20036
exploration 0.100000
learning_rate 0.000050
Timestep 3803

Timestep 38550000
mean reward (100 episodes) 18.380000
best mean reward 18.790000
episodes 20319
exploration 0.100000
learning_rate 0.000050
Timestep 38560000
mean reward (100 episodes) 18.280000
best mean reward 18.790000
episodes 20324
exploration 0.100000
learning_rate 0.000050
Timestep 38570000
mean reward (100 episodes) 18.210000
best mean reward 18.790000
episodes 20329
exploration 0.100000
learning_rate 0.000050
Timestep 38580000
mean reward (100 episodes) 18.210000
best mean reward 18.790000
episodes 20335
exploration 0.100000
learning_rate 0.000050
Timestep 38590000
mean reward (100 episodes) 18.150000
best mean reward 18.790000
episodes 20340
exploration 0.100000
learning_rate 0.000050
Timestep 38600000
mean reward (100 episodes) 18.220000
best mean reward 18.790000
episodes 20345
exploration 0.100000
learning_rate 0.000050
Timestep 38610000
mean reward (100 episodes) 18.210000
best mean reward 18.790000
episodes 20350
exploration 0.100000
learning_rate 0.000050
Timestep 3862

Timestep 39140000
mean reward (100 episodes) 18.190000
best mean reward 18.790000
episodes 20630
exploration 0.100000
learning_rate 0.000050
Timestep 39150000
mean reward (100 episodes) 18.220000
best mean reward 18.790000
episodes 20635
exploration 0.100000
learning_rate 0.000050
Timestep 39160000
mean reward (100 episodes) 18.300000
best mean reward 18.790000
episodes 20641
exploration 0.100000
learning_rate 0.000050
Timestep 39170000
mean reward (100 episodes) 18.280000
best mean reward 18.790000
episodes 20646
exploration 0.100000
learning_rate 0.000050
Timestep 39180000
mean reward (100 episodes) 18.250000
best mean reward 18.790000
episodes 20651
exploration 0.100000
learning_rate 0.000050
Timestep 39190000
mean reward (100 episodes) 18.320000
best mean reward 18.790000
episodes 20657
exploration 0.100000
learning_rate 0.000050
Timestep 39200000
mean reward (100 episodes) 18.320000
best mean reward 18.790000
episodes 20662
exploration 0.100000
learning_rate 0.000050
Timestep 3921

Timestep 39730000
mean reward (100 episodes) 17.960000
best mean reward 18.790000
episodes 20941
exploration 0.100000
learning_rate 0.000050
Timestep 39740000
mean reward (100 episodes) 17.970000
best mean reward 18.790000
episodes 20946
exploration 0.100000
learning_rate 0.000050
Timestep 39750000
mean reward (100 episodes) 17.990000
best mean reward 18.790000
episodes 20952
exploration 0.100000
learning_rate 0.000050
Timestep 39760000
mean reward (100 episodes) 18.100000
best mean reward 18.790000
episodes 20957
exploration 0.100000
learning_rate 0.000050
Timestep 39770000
mean reward (100 episodes) 18.020000
best mean reward 18.790000
episodes 20962
exploration 0.100000
learning_rate 0.000050
Timestep 39780000
mean reward (100 episodes) 18.120000
best mean reward 18.790000
episodes 20968
exploration 0.100000
learning_rate 0.000050
Timestep 39790000
mean reward (100 episodes) 18.150000
best mean reward 18.790000
episodes 20973
exploration 0.100000
learning_rate 0.000050
Timestep 3980

[2018-06-22 15:51:19,289] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video021000.mp4


Timestep 39850000
mean reward (100 episodes) 17.930000
best mean reward 18.790000
episodes 21004
exploration 0.100000
learning_rate 0.000050
Timestep 39860000
mean reward (100 episodes) 17.990000
best mean reward 18.790000
episodes 21010
exploration 0.100000
learning_rate 0.000050
Timestep 39870000
mean reward (100 episodes) 17.990000
best mean reward 18.790000
episodes 21015
exploration 0.100000
learning_rate 0.000050
Timestep 39880000
mean reward (100 episodes) 17.980000
best mean reward 18.790000
episodes 21020
exploration 0.100000
learning_rate 0.000050
Timestep 39890000
mean reward (100 episodes) 18.030000
best mean reward 18.790000
episodes 21026
exploration 0.100000
learning_rate 0.000050
Timestep 39900000
mean reward (100 episodes) 17.930000
best mean reward 18.790000
episodes 21031
exploration 0.100000
learning_rate 0.000050
Timestep 39910000
mean reward (100 episodes) 17.960000
best mean reward 18.790000
episodes 21036
exploration 0.100000
learning_rate 0.000050
Timestep 3992

Timestep 40440000
mean reward (100 episodes) 18.140000
best mean reward 18.790000
episodes 21319
exploration 0.100000
learning_rate 0.000050
Timestep 40450000
mean reward (100 episodes) 18.120000
best mean reward 18.790000
episodes 21324
exploration 0.100000
learning_rate 0.000050
Timestep 40460000
mean reward (100 episodes) 18.150000
best mean reward 18.790000
episodes 21330
exploration 0.100000
learning_rate 0.000050
Timestep 40470000
mean reward (100 episodes) 18.110000
best mean reward 18.790000
episodes 21335
exploration 0.100000
learning_rate 0.000050
Timestep 40480000
mean reward (100 episodes) 18.160000
best mean reward 18.790000
episodes 21340
exploration 0.100000
learning_rate 0.000050
Timestep 40490000
mean reward (100 episodes) 18.160000
best mean reward 18.790000
episodes 21345
exploration 0.100000
learning_rate 0.000050
Timestep 40500000
mean reward (100 episodes) 18.120000
best mean reward 18.790000
episodes 21350
exploration 0.100000
learning_rate 0.000050
Timestep 4051

Timestep 41030000
mean reward (100 episodes) 17.940000
best mean reward 18.790000
episodes 21629
exploration 0.100000
learning_rate 0.000050
Timestep 41040000
mean reward (100 episodes) 17.930000
best mean reward 18.790000
episodes 21634
exploration 0.100000
learning_rate 0.000050
Timestep 41050000
mean reward (100 episodes) 17.970000
best mean reward 18.790000
episodes 21639
exploration 0.100000
learning_rate 0.000050
Timestep 41060000
mean reward (100 episodes) 17.970000
best mean reward 18.790000
episodes 21645
exploration 0.100000
learning_rate 0.000050
Timestep 41070000
mean reward (100 episodes) 17.930000
best mean reward 18.790000
episodes 21650
exploration 0.100000
learning_rate 0.000050
Timestep 41080000
mean reward (100 episodes) 17.940000
best mean reward 18.790000
episodes 21655
exploration 0.100000
learning_rate 0.000050
Timestep 41090000
mean reward (100 episodes) 17.940000
best mean reward 18.790000
episodes 21661
exploration 0.100000
learning_rate 0.000050
Timestep 4110

Timestep 41620000
mean reward (100 episodes) 18.090000
best mean reward 18.790000
episodes 21941
exploration 0.100000
learning_rate 0.000050
Timestep 41630000
mean reward (100 episodes) 18.100000
best mean reward 18.790000
episodes 21947
exploration 0.100000
learning_rate 0.000050
Timestep 41640000
mean reward (100 episodes) 18.040000
best mean reward 18.790000
episodes 21952
exploration 0.100000
learning_rate 0.000050
Timestep 41650000
mean reward (100 episodes) 17.900000
best mean reward 18.790000
episodes 21957
exploration 0.100000
learning_rate 0.000050
Timestep 41660000
mean reward (100 episodes) 17.850000
best mean reward 18.790000
episodes 21962
exploration 0.100000
learning_rate 0.000050
Timestep 41670000
mean reward (100 episodes) 17.890000
best mean reward 18.790000
episodes 21967
exploration 0.100000
learning_rate 0.000050
Timestep 41680000
mean reward (100 episodes) 18.070000
best mean reward 18.790000
episodes 21973
exploration 0.100000
learning_rate 0.000050
Timestep 4169

[2018-06-22 17:37:48,647] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video022000.mp4


Timestep 41740000
mean reward (100 episodes) 17.860000
best mean reward 18.790000
episodes 22004
exploration 0.100000
learning_rate 0.000050
Timestep 41750000
mean reward (100 episodes) 17.760000
best mean reward 18.790000
episodes 22010
exploration 0.100000
learning_rate 0.000050
Timestep 41760000
mean reward (100 episodes) 17.810000
best mean reward 18.790000
episodes 22015
exploration 0.100000
learning_rate 0.000050
Timestep 41770000
mean reward (100 episodes) 17.860000
best mean reward 18.790000
episodes 22020
exploration 0.100000
learning_rate 0.000050
Timestep 41780000
mean reward (100 episodes) 17.820000
best mean reward 18.790000
episodes 22025
exploration 0.100000
learning_rate 0.000050
Timestep 41790000
mean reward (100 episodes) 17.800000
best mean reward 18.790000
episodes 22031
exploration 0.100000
learning_rate 0.000050
Timestep 41800000
mean reward (100 episodes) 17.820000
best mean reward 18.790000
episodes 22036
exploration 0.100000
learning_rate 0.000050
Timestep 4181

Timestep 42330000
mean reward (100 episodes) 18.400000
best mean reward 18.790000
episodes 22321
exploration 0.100000
learning_rate 0.000050
Timestep 42340000
mean reward (100 episodes) 18.500000
best mean reward 18.790000
episodes 22327
exploration 0.100000
learning_rate 0.000050
Timestep 42350000
mean reward (100 episodes) 18.490000
best mean reward 18.790000
episodes 22332
exploration 0.100000
learning_rate 0.000050
Timestep 42360000
mean reward (100 episodes) 18.540000
best mean reward 18.790000
episodes 22338
exploration 0.100000
learning_rate 0.000050
Timestep 42370000
mean reward (100 episodes) 18.500000
best mean reward 18.790000
episodes 22343
exploration 0.100000
learning_rate 0.000050
Timestep 42380000
mean reward (100 episodes) 18.540000
best mean reward 18.790000
episodes 22348
exploration 0.100000
learning_rate 0.000050
Timestep 42390000
mean reward (100 episodes) 18.540000
best mean reward 18.790000
episodes 22354
exploration 0.100000
learning_rate 0.000050
Timestep 4240

Timestep 42920000
mean reward (100 episodes) 18.580000
best mean reward 18.810000
episodes 22636
exploration 0.100000
learning_rate 0.000050
Timestep 42930000
mean reward (100 episodes) 18.430000
best mean reward 18.810000
episodes 22641
exploration 0.100000
learning_rate 0.000050
Timestep 42940000
mean reward (100 episodes) 18.540000
best mean reward 18.810000
episodes 22646
exploration 0.100000
learning_rate 0.000050
Timestep 42950000
mean reward (100 episodes) 18.590000
best mean reward 18.810000
episodes 22652
exploration 0.100000
learning_rate 0.000050
Timestep 42960000
mean reward (100 episodes) 18.580000
best mean reward 18.810000
episodes 22657
exploration 0.100000
learning_rate 0.000050
Timestep 42970000
mean reward (100 episodes) 18.540000
best mean reward 18.810000
episodes 22663
exploration 0.100000
learning_rate 0.000050
Timestep 42980000
mean reward (100 episodes) 18.460000
best mean reward 18.810000
episodes 22668
exploration 0.100000
learning_rate 0.000050
Timestep 4299

Timestep 43510000
mean reward (100 episodes) 17.580000
best mean reward 18.810000
episodes 22944
exploration 0.100000
learning_rate 0.000050
Timestep 43520000
mean reward (100 episodes) 17.530000
best mean reward 18.810000
episodes 22950
exploration 0.100000
learning_rate 0.000050
Timestep 43530000
mean reward (100 episodes) 17.450000
best mean reward 18.810000
episodes 22954
exploration 0.100000
learning_rate 0.000050
Timestep 43540000
mean reward (100 episodes) 17.520000
best mean reward 18.810000
episodes 22960
exploration 0.100000
learning_rate 0.000050
Timestep 43550000
mean reward (100 episodes) 17.660000
best mean reward 18.810000
episodes 22966
exploration 0.100000
learning_rate 0.000050
Timestep 43560000
mean reward (100 episodes) 17.760000
best mean reward 18.810000
episodes 22971
exploration 0.100000
learning_rate 0.000050
Timestep 43570000
mean reward (100 episodes) 17.820000
best mean reward 18.810000
episodes 22976
exploration 0.100000
learning_rate 0.000050
Timestep 4358

[2018-06-22 19:24:01,403] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video023000.mp4


Timestep 43620000
mean reward (100 episodes) 17.640000
best mean reward 18.810000
episodes 23003
exploration 0.100000
learning_rate 0.000050
Timestep 43630000
mean reward (100 episodes) 17.690000
best mean reward 18.810000
episodes 23008
exploration 0.100000
learning_rate 0.000050
Timestep 43640000
mean reward (100 episodes) 17.570000
best mean reward 18.810000
episodes 23013
exploration 0.100000
learning_rate 0.000050
Timestep 43650000
mean reward (100 episodes) 17.630000
best mean reward 18.810000
episodes 23018
exploration 0.100000
learning_rate 0.000050
Timestep 43660000
mean reward (100 episodes) 17.630000
best mean reward 18.810000
episodes 23024
exploration 0.100000
learning_rate 0.000050
Timestep 43670000
mean reward (100 episodes) 17.520000
best mean reward 18.810000
episodes 23029
exploration 0.100000
learning_rate 0.000050
Timestep 43680000
mean reward (100 episodes) 17.520000
best mean reward 18.810000
episodes 23034
exploration 0.100000
learning_rate 0.000050
Timestep 4369

Timestep 44210000
mean reward (100 episodes) 17.600000
best mean reward 18.810000
episodes 23312
exploration 0.100000
learning_rate 0.000050
Timestep 44220000
mean reward (100 episodes) 17.580000
best mean reward 18.810000
episodes 23317
exploration 0.100000
learning_rate 0.000050
Timestep 44230000
mean reward (100 episodes) 17.740000
best mean reward 18.810000
episodes 23322
exploration 0.100000
learning_rate 0.000050
Timestep 44240000
mean reward (100 episodes) 17.770000
best mean reward 18.810000
episodes 23328
exploration 0.100000
learning_rate 0.000050
Timestep 44250000
mean reward (100 episodes) 17.750000
best mean reward 18.810000
episodes 23333
exploration 0.100000
learning_rate 0.000050
Timestep 44260000
mean reward (100 episodes) 17.700000
best mean reward 18.810000
episodes 23338
exploration 0.100000
learning_rate 0.000050
Timestep 44270000
mean reward (100 episodes) 17.650000
best mean reward 18.810000
episodes 23343
exploration 0.100000
learning_rate 0.000050
Timestep 4428

Timestep 44800000
mean reward (100 episodes) 17.330000
best mean reward 18.810000
episodes 23621
exploration 0.100000
learning_rate 0.000050
Timestep 44810000
mean reward (100 episodes) 17.310000
best mean reward 18.810000
episodes 23626
exploration 0.100000
learning_rate 0.000050
Timestep 44820000
mean reward (100 episodes) 17.280000
best mean reward 18.810000
episodes 23631
exploration 0.100000
learning_rate 0.000050
Timestep 44830000
mean reward (100 episodes) 17.290000
best mean reward 18.810000
episodes 23637
exploration 0.100000
learning_rate 0.000050
Timestep 44840000
mean reward (100 episodes) 17.390000
best mean reward 18.810000
episodes 23643
exploration 0.100000
learning_rate 0.000050
Timestep 44850000
mean reward (100 episodes) 17.290000
best mean reward 18.810000
episodes 23647
exploration 0.100000
learning_rate 0.000050
Timestep 44860000
mean reward (100 episodes) 17.230000
best mean reward 18.810000
episodes 23653
exploration 0.100000
learning_rate 0.000050
Timestep 4487

Timestep 45390000
mean reward (100 episodes) 17.660000
best mean reward 18.810000
episodes 23934
exploration 0.100000
learning_rate 0.000050
Timestep 45400000
mean reward (100 episodes) 17.580000
best mean reward 18.810000
episodes 23939
exploration 0.100000
learning_rate 0.000050
Timestep 45410000
mean reward (100 episodes) 17.580000
best mean reward 18.810000
episodes 23944
exploration 0.100000
learning_rate 0.000050
Timestep 45420000
mean reward (100 episodes) 17.630000
best mean reward 18.810000
episodes 23949
exploration 0.100000
learning_rate 0.000050
Timestep 45430000
mean reward (100 episodes) 17.620000
best mean reward 18.810000
episodes 23954
exploration 0.100000
learning_rate 0.000050
Timestep 45440000
mean reward (100 episodes) 17.610000
best mean reward 18.810000
episodes 23960
exploration 0.100000
learning_rate 0.000050
Timestep 45450000
mean reward (100 episodes) 17.640000
best mean reward 18.810000
episodes 23965
exploration 0.100000
learning_rate 0.000050
Timestep 4546

[2018-06-22 21:10:48,934] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video024000.mp4


Timestep 45520000
mean reward (100 episodes) 17.700000
best mean reward 18.810000
episodes 24002
exploration 0.100000
learning_rate 0.000050
Timestep 45530000
mean reward (100 episodes) 17.560000
best mean reward 18.810000
episodes 24007
exploration 0.100000
learning_rate 0.000050
Timestep 45540000
mean reward (100 episodes) 17.580000
best mean reward 18.810000
episodes 24013
exploration 0.100000
learning_rate 0.000050
Timestep 45550000
mean reward (100 episodes) 17.520000
best mean reward 18.810000
episodes 24018
exploration 0.100000
learning_rate 0.000050
Timestep 45560000
mean reward (100 episodes) 17.590000
best mean reward 18.810000
episodes 24023
exploration 0.100000
learning_rate 0.000050
Timestep 45570000
mean reward (100 episodes) 17.630000
best mean reward 18.810000
episodes 24029
exploration 0.100000
learning_rate 0.000050
Timestep 45580000
mean reward (100 episodes) 17.650000
best mean reward 18.810000
episodes 24034
exploration 0.100000
learning_rate 0.000050
Timestep 4559

Timestep 46110000
mean reward (100 episodes) 17.130000
best mean reward 18.810000
episodes 24312
exploration 0.100000
learning_rate 0.000050
Timestep 46120000
mean reward (100 episodes) 17.090000
best mean reward 18.810000
episodes 24318
exploration 0.100000
learning_rate 0.000050
Timestep 46130000
mean reward (100 episodes) 17.190000
best mean reward 18.810000
episodes 24323
exploration 0.100000
learning_rate 0.000050
Timestep 46140000
mean reward (100 episodes) 17.130000
best mean reward 18.810000
episodes 24328
exploration 0.100000
learning_rate 0.000050
Timestep 46150000
mean reward (100 episodes) 17.200000
best mean reward 18.810000
episodes 24333
exploration 0.100000
learning_rate 0.000050
Timestep 46160000
mean reward (100 episodes) 17.200000
best mean reward 18.810000
episodes 24339
exploration 0.100000
learning_rate 0.000050
Timestep 46170000
mean reward (100 episodes) 17.270000
best mean reward 18.810000
episodes 24344
exploration 0.100000
learning_rate 0.000050
Timestep 4618

Timestep 46700000
mean reward (100 episodes) 17.440000
best mean reward 18.810000
episodes 24626
exploration 0.100000
learning_rate 0.000050
Timestep 46710000
mean reward (100 episodes) 17.460000
best mean reward 18.810000
episodes 24631
exploration 0.100000
learning_rate 0.000050
Timestep 46720000
mean reward (100 episodes) 17.460000
best mean reward 18.810000
episodes 24637
exploration 0.100000
learning_rate 0.000050
Timestep 46730000
mean reward (100 episodes) 17.400000
best mean reward 18.810000
episodes 24642
exploration 0.100000
learning_rate 0.000050
Timestep 46740000
mean reward (100 episodes) 17.450000
best mean reward 18.810000
episodes 24647
exploration 0.100000
learning_rate 0.000050
Timestep 46750000
mean reward (100 episodes) 17.520000
best mean reward 18.810000
episodes 24652
exploration 0.100000
learning_rate 0.000050
Timestep 46760000
mean reward (100 episodes) 17.420000
best mean reward 18.810000
episodes 24657
exploration 0.100000
learning_rate 0.000050
Timestep 4677

Timestep 47290000
mean reward (100 episodes) 17.540000
best mean reward 18.810000
episodes 24933
exploration 0.100000
learning_rate 0.000050
Timestep 47300000
mean reward (100 episodes) 17.530000
best mean reward 18.810000
episodes 24938
exploration 0.100000
learning_rate 0.000050
Timestep 47310000
mean reward (100 episodes) 17.500000
best mean reward 18.810000
episodes 24943
exploration 0.100000
learning_rate 0.000050
Timestep 47320000
mean reward (100 episodes) 17.480000
best mean reward 18.810000
episodes 24948
exploration 0.100000
learning_rate 0.000050
Timestep 47330000
mean reward (100 episodes) 17.550000
best mean reward 18.810000
episodes 24953
exploration 0.100000
learning_rate 0.000050
Timestep 47340000
mean reward (100 episodes) 17.500000
best mean reward 18.810000
episodes 24959
exploration 0.100000
learning_rate 0.000050
Timestep 47350000
mean reward (100 episodes) 17.460000
best mean reward 18.810000
episodes 24964
exploration 0.100000
learning_rate 0.000050
Timestep 4736

[2018-06-22 22:57:59,727] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video025000.mp4


Timestep 47420000
mean reward (100 episodes) 17.360000
best mean reward 18.810000
episodes 25000
exploration 0.100000
learning_rate 0.000050
Timestep 47430000
mean reward (100 episodes) 17.460000
best mean reward 18.810000
episodes 25005
exploration 0.100000
learning_rate 0.000050
Timestep 47440000
mean reward (100 episodes) 17.460000
best mean reward 18.810000
episodes 25011
exploration 0.100000
learning_rate 0.000050
Timestep 47450000
mean reward (100 episodes) 17.490000
best mean reward 18.810000
episodes 25016
exploration 0.100000
learning_rate 0.000050
Timestep 47460000
mean reward (100 episodes) 17.590000
best mean reward 18.810000
episodes 25022
exploration 0.100000
learning_rate 0.000050
Timestep 47470000
mean reward (100 episodes) 17.580000
best mean reward 18.810000
episodes 25027
exploration 0.100000
learning_rate 0.000050
Timestep 47480000
mean reward (100 episodes) 17.600000
best mean reward 18.810000
episodes 25032
exploration 0.100000
learning_rate 0.000050
Timestep 4749

Timestep 48010000
mean reward (100 episodes) 17.530000
best mean reward 18.810000
episodes 25308
exploration 0.100000
learning_rate 0.000050
Timestep 48020000
mean reward (100 episodes) 17.510000
best mean reward 18.810000
episodes 25313
exploration 0.100000
learning_rate 0.000050
Timestep 48030000
mean reward (100 episodes) 17.530000
best mean reward 18.810000
episodes 25318
exploration 0.100000
learning_rate 0.000050
Timestep 48040000
mean reward (100 episodes) 17.550000
best mean reward 18.810000
episodes 25323
exploration 0.100000
learning_rate 0.000050
Timestep 48050000
mean reward (100 episodes) 17.500000
best mean reward 18.810000
episodes 25328
exploration 0.100000
learning_rate 0.000050
Timestep 48060000
mean reward (100 episodes) 17.450000
best mean reward 18.810000
episodes 25334
exploration 0.100000
learning_rate 0.000050
Timestep 48070000
mean reward (100 episodes) 17.480000
best mean reward 18.810000
episodes 25339
exploration 0.100000
learning_rate 0.000050
Timestep 4808

Timestep 48600000
mean reward (100 episodes) 17.190000
best mean reward 18.810000
episodes 25616
exploration 0.100000
learning_rate 0.000050
Timestep 48610000
mean reward (100 episodes) 17.180000
best mean reward 18.810000
episodes 25622
exploration 0.100000
learning_rate 0.000050
Timestep 48620000
mean reward (100 episodes) 17.130000
best mean reward 18.810000
episodes 25627
exploration 0.100000
learning_rate 0.000050
Timestep 48630000
mean reward (100 episodes) 17.190000
best mean reward 18.810000
episodes 25632
exploration 0.100000
learning_rate 0.000050
Timestep 48640000
mean reward (100 episodes) 17.230000
best mean reward 18.810000
episodes 25637
exploration 0.100000
learning_rate 0.000050
Timestep 48650000
mean reward (100 episodes) 17.290000
best mean reward 18.810000
episodes 25643
exploration 0.100000
learning_rate 0.000050
Timestep 48660000
mean reward (100 episodes) 17.260000
best mean reward 18.810000
episodes 25648
exploration 0.100000
learning_rate 0.000050
Timestep 4867

Timestep 49190000
mean reward (100 episodes) 17.650000
best mean reward 18.810000
episodes 25920
exploration 0.100000
learning_rate 0.000050
Timestep 49200000
mean reward (100 episodes) 17.690000
best mean reward 18.810000
episodes 25926
exploration 0.100000
learning_rate 0.000050
Timestep 49210000
mean reward (100 episodes) 17.670000
best mean reward 18.810000
episodes 25931
exploration 0.100000
learning_rate 0.000050
Timestep 49220000
mean reward (100 episodes) 17.680000
best mean reward 18.810000
episodes 25936
exploration 0.100000
learning_rate 0.000050
Timestep 49230000
mean reward (100 episodes) 17.680000
best mean reward 18.810000
episodes 25942
exploration 0.100000
learning_rate 0.000050
Timestep 49240000
mean reward (100 episodes) 17.650000
best mean reward 18.810000
episodes 25947
exploration 0.100000
learning_rate 0.000050
Timestep 49250000
mean reward (100 episodes) 17.640000
best mean reward 18.810000
episodes 25952
exploration 0.100000
learning_rate 0.000050
Timestep 4926

[2018-06-23 00:46:07,845] Starting new video recorder writing to /tmp/hw3_vid_dir2/gym/openaigym.video.0.8907.video026000.mp4


Timestep 49350000
mean reward (100 episodes) 17.630000
best mean reward 18.810000
episodes 26004
exploration 0.100000
learning_rate 0.000050
Timestep 49360000
mean reward (100 episodes) 17.650000
best mean reward 18.810000
episodes 26009
exploration 0.100000
learning_rate 0.000050
Timestep 49370000
mean reward (100 episodes) 17.790000
best mean reward 18.810000
episodes 26015
exploration 0.100000
learning_rate 0.000050
Timestep 49380000
mean reward (100 episodes) 17.810000
best mean reward 18.810000
episodes 26020
exploration 0.100000
learning_rate 0.000050
Timestep 49390000
mean reward (100 episodes) 17.730000
best mean reward 18.810000
episodes 26025
exploration 0.100000
learning_rate 0.000050
Timestep 49400000
mean reward (100 episodes) 17.610000
best mean reward 18.810000
episodes 26030
exploration 0.100000
learning_rate 0.000050
Timestep 49410000
mean reward (100 episodes) 17.600000
best mean reward 18.810000
episodes 26035
exploration 0.100000
learning_rate 0.000050
Timestep 4942

Timestep 49940000
mean reward (100 episodes) 17.630000
best mean reward 18.810000
episodes 26315
exploration 0.100000
learning_rate 0.000050
Timestep 49950000
mean reward (100 episodes) 17.610000
best mean reward 18.810000
episodes 26320
exploration 0.100000
learning_rate 0.000050
Timestep 49960000
mean reward (100 episodes) 17.710000
best mean reward 18.810000
episodes 26326
exploration 0.100000
learning_rate 0.000050
Timestep 49970000
mean reward (100 episodes) 17.790000
best mean reward 18.810000
episodes 26331
exploration 0.100000
learning_rate 0.000050
Timestep 49980000
mean reward (100 episodes) 17.670000
best mean reward 18.810000
episodes 26336
exploration 0.100000
learning_rate 0.000050
Timestep 49990000
mean reward (100 episodes) 17.680000
best mean reward 18.810000
episodes 26342
exploration 0.100000
learning_rate 0.000050
Timestep 50000000
mean reward (100 episodes) 17.680000
best mean reward 18.810000
episodes 26347
exploration 0.100000
learning_rate 0.000050
Timestep 5001

In [None]:
replay_buffer = ReplayBuffer(1000, 10)

In [41]:
isinstance(env, gym.Wrapper)

False