In [4]:
import numpy as np
import tensorflow as tf
import gym

# Hyperparameters
H = 200  # number of hidden layer neurons
learning_rate = 1e-4
gamma = 0.99  # discount factor for reward
D = 80 * 80  # input dimensionality: 80x80 grid
render = True

# Keras Model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(H, activation='relu', input_shape=(D,)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

# Preprocessing function
def prepro(I):
    """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """
    if isinstance(I, tuple):
        I = I[0]
    I = I[35:195]
    I = I[::2, ::2, 0]
    I[I == 144] = 0
    I[I == 109] = 0
    I[I != 0] = 1
    return I.astype(float).ravel()

# Discounted rewards function
def discount_rewards(r):
    """ take 1D float array of rewards and compute discounted reward """
    discounted_r = np.zeros_like(r)
    running_add = 0
    for t in reversed(range(r.size)):
        if r[t] != 0: running_add = 0
        running_add = running_add * gamma + r[t]
        discounted_r[t] = running_add
    return discounted_r

# Custom loss function
def custom_loss(y_true, y_pred):
    return -tf.reduce_sum(y_true * tf.math.log(y_pred) + (1 - y_true) * tf.math.log(1 - y_pred))

# Training loop
env = gym.make("Pong-v0", render_mode='human')
observation = env.reset()

prev_x = None
xs, dlogps, drs = [], [], []
reward_sum = 0
episode_number = 0

running_reward = None 

while True:
    env.render() 
    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(D)
    prev_x = cur_x

    aprob = model.predict(x.reshape(1, -1), batch_size=1).flatten()
    action = 2 if np.random.uniform() < aprob else 3

    xs.append(x)
    y = 1 if action == 2 else 0
    dlogps.append(y - aprob)

    observation, reward, done, info = env.step(action)[:4]
    reward_sum += reward
    drs.append(reward)

    if done:
        episode_number += 1

        epx = np.vstack(xs)
        epdlogp = np.vstack(dlogps)
        epr = np.vstack(drs)
        xs, dlogps, drs = [], [], []

        discounted_epr = discount_rewards(epr)
        discounted_epr -= np.mean(discounted_epr)
        discounted_epr /= (np.std(discounted_epr) + 1e-10)

        with tf.GradientTape() as tape:
            p = model(epx, training=True)
            loss = custom_loss(discounted_epr, p)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
        print('Resetting env. Episode reward total was %.f. Running mean: %.f' % (reward_sum, running_reward))
        reward_sum = 0
        observation = env.reset()
        prev_x = None

#         if episode_number % 100 == 0:
#             model.save('pong_model.h5')
        model.save('pong_model.h5')

    if reward != 0:
        print('Ep %d: Game finished, reward: %f' % (episode_number, reward) + ('' if reward == -1 else ' !!!!!!!!'))


2023-11-18 17:17:50.775367: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-18 17:17:54.999113: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  logger.warn(
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
  logger.warn(


Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000


Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000


Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000


Ep 0: Game finished, reward: -1.000000


Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000


Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000


Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000


Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000
Ep 0: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 1: Game finished, reward: -1.000000


Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000


Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000


Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: 1.000000 !!!!!!!!


Ep 1: Game finished, reward: -1.000000


Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000


Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000


Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000


Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000


Ep 1: Game finished, reward: -1.000000
Ep 1: Game finished, reward: -1.000000
Resetting env. Episode reward total was -20. Running mean: -21




Ep 2: Game finished, reward: -1.000000




Ep 2: Game finished, reward: 1.000000 !!!!!!!!
Ep 2: Game finished, reward: -1.000000


Ep 2: Game finished, reward: -1.000000


Ep 2: Game finished, reward: -1.000000
Ep 2: Game finished, reward: -1.000000


Ep 2: Game finished, reward: -1.000000
Ep 2: Game finished, reward: -1.000000


Ep 2: Game finished, reward: -1.000000
Ep 2: Game finished, reward: -1.000000
Ep 2: Game finished, reward: -1.000000


Ep 2: Game finished, reward: -1.000000
Ep 2: Game finished, reward: -1.000000


Ep 2: Game finished, reward: -1.000000




Ep 2: Game finished, reward: -1.000000
Ep 2: Game finished, reward: -1.000000


Ep 2: Game finished, reward: -1.000000
Ep 2: Game finished, reward: -1.000000


Ep 2: Game finished, reward: -1.000000
Ep 2: Game finished, reward: -1.000000
Ep 2: Game finished, reward: -1.000000


Ep 2: Game finished, reward: -1.000000
Resetting env. Episode reward total was -20. Running mean: -21




Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000


Ep 3: Game finished, reward: -1.000000


Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000


Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000


Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000


Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000


Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000


Ep 3: Game finished, reward: -1.000000
Ep 3: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000


Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000


Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000


Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000


Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: 1.000000 !!!!!!!!


Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000


Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000


Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000
Ep 4: Game finished, reward: -1.000000


Ep 4: Game finished, reward: -1.000000
Resetting env. Episode reward total was -20. Running mean: -21




Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000


Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000


Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000


Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000


Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000


Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000


Ep 5: Game finished, reward: -1.000000


Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: -1.000000
Ep 5: Game finished, reward: 1.000000 !!!!!!!!


Ep 5: Game finished, reward: -1.000000
Resetting env. Episode reward total was -20. Running mean: -21




Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000


Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000


Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000


Ep 6: Game finished, reward: -1.000000


Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000


Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000


Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: 1.000000 !!!!!!!!


Ep 6: Game finished, reward: -1.000000


Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000
Ep 6: Game finished, reward: -1.000000


Ep 6: Game finished, reward: -1.000000


Resetting env. Episode reward total was -20. Running mean: -21




Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000


Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000


Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000




Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000


Ep 7: Game finished, reward: 1.000000 !!!!!!!!
Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000


Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000


Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000


Ep 7: Game finished, reward: -1.000000


Ep 7: Game finished, reward: -1.000000
Ep 7: Game finished, reward: -1.000000


Ep 7: Game finished, reward: -1.000000
Resetting env. Episode reward total was -20. Running mean: -21




Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000


Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000


Ep 8: Game finished, reward: -1.000000


Ep 8: Game finished, reward: -1.000000


Ep 8: Game finished, reward: 1.000000 !!!!!!!!
Ep 8: Game finished, reward: -1.000000


Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000


Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000


Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000


Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000
Ep 8: Game finished, reward: -1.000000


Resetting env. Episode reward total was -20. Running mean: -21




Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000


Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000


Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000


Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000


Ep 9: Game finished, reward: -1.000000


Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000


Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000


Ep 9: Game finished, reward: -1.000000
Ep 9: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000


Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000


Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000


Ep 10: Game finished, reward: -1.000000




Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000


Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000


Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000


Ep 10: Game finished, reward: -1.000000
Ep 10: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: 1.000000 !!!!!!!!


Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: 1.000000 !!!!!!!!
Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: -1.000000


Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000
Ep 11: Game finished, reward: -1.000000


Resetting env. Episode reward total was -19. Running mean: -21




Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000


Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000


Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000


Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000


Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000


Ep 12: Game finished, reward: -1.000000


Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000


Ep 12: Game finished, reward: -1.000000
Ep 12: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000


Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000


Ep 13: Game finished, reward: -1.000000


Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000


Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000


Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000


Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000
Ep 13: Game finished, reward: -1.000000


Ep 13: Game finished, reward: -1.000000


Ep 13: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000


Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000


Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000


Ep 14: Game finished, reward: 1.000000 !!!!!!!!
Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000


Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000


Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000


Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000


Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: -1.000000


Ep 14: Game finished, reward: -1.000000
Ep 14: Game finished, reward: 1.000000 !!!!!!!!


Resetting env. Episode reward total was -19. Running mean: -21




Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000


Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000


Ep 15: Game finished, reward: -1.000000


Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000


Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000


Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000


Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000


Ep 15: Game finished, reward: -1.000000


Ep 15: Game finished, reward: -1.000000
Ep 15: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000


Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000


Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000


Ep 16: Game finished, reward: -1.000000


Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000


Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000


Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000


Ep 16: Game finished, reward: -1.000000
Ep 16: Game finished, reward: -1.000000


Ep 16: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 17: Game finished, reward: -1.000000


Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000


Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000


Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000


Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000


Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000


Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000


Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000
Ep 17: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000


Ep 18: Game finished, reward: -1.000000


Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000


Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000


Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000


Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000


Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000


Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000
Ep 18: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000


Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000


Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000




Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000


Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000


Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000


Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000
Ep 19: Game finished, reward: -1.000000


Ep 19: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000


Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000


Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000


Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000


Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000


Ep 20: Game finished, reward: -1.000000


Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000


Ep 20: Game finished, reward: 1.000000 !!!!!!!!
Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000


Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000


Ep 20: Game finished, reward: -1.000000
Ep 20: Game finished, reward: -1.000000
Resetting env. Episode reward total was -20. Running mean: -21




Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000


Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000


Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000


Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000


Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000


Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000


Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000


Ep 21: Game finished, reward: -1.000000


Ep 21: Game finished, reward: -1.000000
Ep 21: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 22: Game finished, reward: -1.000000


Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000


Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000


Ep 22: Game finished, reward: -1.000000




Ep 22: Game finished, reward: 1.000000 !!!!!!!!
Ep 22: Game finished, reward: -1.000000


Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000


Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000


Ep 22: Game finished, reward: -1.000000


Ep 22: Game finished, reward: -1.000000
Ep 22: Game finished, reward: -1.000000


Ep 22: Game finished, reward: -1.000000


Resetting env. Episode reward total was -20. Running mean: -21




Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000


Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000


Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000


Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000


Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000


Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000


Ep 23: Game finished, reward: 1.000000 !!!!!!!!
Ep 23: Game finished, reward: -1.000000


Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000
Ep 23: Game finished, reward: -1.000000


Ep 23: Game finished, reward: -1.000000
Resetting env. Episode reward total was -20. Running mean: -21




Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000


Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000


Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000


Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000


Ep 24: Game finished, reward: -1.000000


Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000


Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000


Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000
Ep 24: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000


Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000


Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000


Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000


Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000


Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000


Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000


Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000
Ep 25: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000


Ep 26: Game finished, reward: -1.000000


Ep 26: Game finished, reward: -1.000000


Ep 26: Game finished, reward: -1.000000


Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000




Ep 26: Game finished, reward: -1.000000


Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000


Ep 26: Game finished, reward: -1.000000


Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000




Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000
Ep 26: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000


Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000


Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000


Ep 27: Game finished, reward: -1.000000


Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000


Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000


Ep 27: Game finished, reward: -1.000000


Ep 27: Game finished, reward: -1.000000
Ep 27: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000


Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000


Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000


Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000


Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000


Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000


Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000


Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000
Ep 28: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000


Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000


Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000


Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000


Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000


Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000


Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: -1.000000


Ep 29: Game finished, reward: -1.000000
Ep 29: Game finished, reward: 1.000000 !!!!!!!!


Ep 29: Game finished, reward: -1.000000
Resetting env. Episode reward total was -20. Running mean: -21




Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000


Ep 30: Game finished, reward: -1.000000


Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000


Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000


Ep 30: Game finished, reward: -1.000000


Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000


Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000


Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000
Ep 30: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000


Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000


Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000


Ep 31: Game finished, reward: -1.000000


Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000


Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000


Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000
Ep 31: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000


Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000


Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000


Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000


Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000


Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000


Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000
Ep 32: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000


Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000


Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000




Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000


Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000


Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000


Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000
Ep 33: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000


Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000


Ep 34: Game finished, reward: -1.000000


Ep 34: Game finished, reward: -1.000000


Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000


Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000


Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000


Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000


Ep 34: Game finished, reward: -1.000000


Ep 34: Game finished, reward: -1.000000
Ep 34: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000


Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000


Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000


Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000


Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000


Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000


Ep 35: Game finished, reward: -1.000000


Ep 35: Game finished, reward: -1.000000


Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000


Ep 35: Game finished, reward: -1.000000
Ep 35: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: 1.000000 !!!!!!!!


Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000


Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: 1.000000 !!!!!!!!


Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000


Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000


Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000


Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000


Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000
Ep 36: Game finished, reward: -1.000000


Resetting env. Episode reward total was -19. Running mean: -21




Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000


Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000


Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000


Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000


Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000


Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000


Ep 37: Game finished, reward: -1.000000


Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000
Ep 37: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000


Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000




Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000


Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000


Ep 38: Game finished, reward: -1.000000


Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000


Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000


Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000


Ep 38: Game finished, reward: -1.000000
Ep 38: Game finished, reward: -1.000000


Resetting env. Episode reward total was -21. Running mean: -21




Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000


Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000


Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000




Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000


Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000


Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000


Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000
Ep 39: Game finished, reward: -1.000000


Ep 39: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 40: Game finished, reward: -1.000000


Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000


Ep 40: Game finished, reward: -1.000000


Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000


Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000


Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000


Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000


Ep 40: Game finished, reward: -1.000000


Ep 40: Game finished, reward: -1.000000
Ep 40: Game finished, reward: -1.000000
Resetting env. Episode reward total was -21. Running mean: -21




Ep 41: Game finished, reward: -1.000000


Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000


Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: 1.000000 !!!!!!!!


Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000


Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000


Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: 1.000000 !!!!!!!!


Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000


Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000


Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000


Ep 41: Game finished, reward: -1.000000
Ep 41: Game finished, reward: -1.000000


Resetting env. Episode reward total was -19. Running mean: -21




Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000


Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000


Ep 42: Game finished, reward: -1.000000


Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000


Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: 1.000000 !!!!!!!!


Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000


Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000


Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000
Ep 42: Game finished, reward: -1.000000


Ep 42: Game finished, reward: -1.000000
Resetting env. Episode reward total was -20. Running mean: -21




Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000


Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000


Ep 43: Game finished, reward: 1.000000 !!!!!!!!


Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000


Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000


Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000


Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000


Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000
Ep 43: Game finished, reward: -1.000000


Resetting env. Episode reward total was -20. Running mean: -21




Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: 1.000000 !!!!!!!!


Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000


Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000


Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000


Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000


Ep 44: Game finished, reward: -1.000000
Ep 44: Game finished, reward: -1.000000


KeyboardInterrupt: 

In [None]:
# LOADING AND RUNNING SAVED MODEL

In [None]:
import tensorflow as tf
import gym
import numpy as np

# Load the previously trained model
model = tf.keras.models.load_model('pong_model.h5')

# Preprocessing function
def prepro(I):
    """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """
    if isinstance(I, tuple):
        I = I[0]
    I = I[35:195]
    I = I[::2, ::2, 0]
    I[I == 144] = 0
    I[I == 109] = 0
    I[I != 0] = 1
    return I.astype(float).ravel()

# Initialize the Pong environment
env = gym.make("Pong-v0", render_mode='human')
observation = env.reset()

prev_x = None

# Run the model on the environment
while True:
    env.render()

    cur_x = prepro(observation)
    x = cur_x - prev_x if prev_x is not None else np.zeros(80 * 80)
    prev_x = cur_x

    aprob = model.predict(x.reshape(1, -1), batch_size=1).flatten()
    action = 2 if np.random.uniform() < aprob else 3

    observation, reward, done, info = env.step(action)[:4]

    if done:
        observation = env.reset()


2023-11-18 21:55:22.447456: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.




2023-11-18 21:55:25.996885: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  logger.warn(
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]




  logger.warn(






























