In [1]:
import numpy as np
import tensorflow as tf
import random
import dqn
from collections import deque

import gym
env = gym.make('CartPole-v0')
env._max_episode_steps = 10000

# Constants defining our neural network
input_size = env.observation_space.shape[0]
output_size = env.action_space.n

mainDQN = None

dis = 0.9
REPLAY_MEMORY = 50000

[2017-11-15 09:35:40,912] Making new env: CartPole-v0


In [2]:
class DQN:
    def __init__(self, session, input_size, output_size, name="main"):
        self.session = session
        self.input_size = input_size
        self.output_size = output_size
        self.net_name = name
        
        self._build_network()
        
    def _build_network(self, h_size = 10, l_rate = 1e-1):
        with tf.variable_scope(self.net_name):
            self._X = tf.placeholder(
                tf.float32, [None, self.input_size], name = "input_x")
        
            # First layer of weights
            W1 = tf.get_variable("W1", shape=[self.input_size, h_size],
                             initializer=tf.contrib.layers.xavier_initializer())
            layer1 = tf.nn.tanh(tf.matmul(self._X, W1))
        
            # Second Layer of weights
            W2 = tf.get_variable("W2", shape=[h_size, self.output_size],
                             initializer=tf.contrib.layers.xavier_initializer())
        
            # Q prediction
            self._Qpred = tf.matmul(layer1, W2)
        
        # We need to define the parts of the network needed for learning a policy
        self._Y = tf.placeholder(
            shape = [None, self.output_size], dtype = tf.float32)
    
        # Loss function
        self._loss = tf.reduce_mean(tf.square(self._Y - self._Qpred))
        # Learning
        self._train = tf.train.AdamOptimizer(
            learning_rate=l_rate).minimize(self._loss)
    
    def predict(self, state):
        x = np.reshape(state, [1, self.input_size])
        return self.session.run(self._Qpred, feed_dict = {self._X: x})
    
    def update(self, x_stack, y_stack):
        return self.session.run([self._loss, self._train], feed_dict = 
                               {self._X: x_stack, self._Y: y_stack})

In [3]:
def replay_train(mainDQN, targetDQN, train_batch):
    x_stack = np.empty(0).reshape(0, input_size)
    y_stack = np.empty(0).reshape(0, output_size)
    
    # Get stored information from the buffer
    for state, action, reward, next_state, done in train_batch:
        Q = mainDQN.predict(state)
        
        # terminal?
        if done:
            Q[0, action] = reward
        else:
            # get target from target DQN (Q')
            Q[0, action] = reward + dis * np.max(targetDQN.predict(next_state))
            
        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack([x_stack, state])
        
    # Train our network using target and predicted Q values on each episode
    return mainDQN.update(x_stack, y_stack)

In [4]:
def get_copy_var_ops(*, dest_scope_name = "target", src_scope_name = "main"):
    # Copy variables src_scope to dest_scope
    op_holder = []
    
    src_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = src_scope_name)
    dest_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = dest_scope_name)
    
    for src_var, dest_var in zip(src_vars, dest_vars):
        op_holder.append(dest_var.assign(src_var.value()))
        
    return op_holder

In [5]:
def bot_play(mainDQN):
    # See our trained network in action
    s = env.reset()
    reward_sum = 0
    while True:
        env.render()
        a = np.argmax(mainDQN.predict(s))
        s, reward, done, _ = env.step(a)
        reward_sum += reward
        if done:
            print("Total score: {}".format(reward_sum))
            break

In [6]:
def main():
    max_episodes = 5000
    # store the previous observations in replay memory
    replay_buffer = deque()
    
    with tf.Session() as sess:
        mainDQN = DQN(sess, input_size, output_size, name = "main")
        targetDQN = DQN(sess, input_size, output_size, name = "target")
        tf.global_variables_initializer().run()
    
        # initial copy q_net -> target_net
        copy_ops = get_copy_var_ops(dest_scope_name = "target", src_scope_name = "main")
    
        sess.run(copy_ops)
    
        for episode in range(max_episodes):
            e = 1. / ((episode / 10) + 1)
            done = False
            step_count = 0
            state = env.reset()
        
            while not done:
                if np.random.rand(1) < e:
                    action = env.action_space.sample()
                else:
                    # Choose an action by greedily from the Q-netsork
                    action = np.argmax(mainDQN.predict(state))
                
                # Get new state and reward from environment
                next_state, reward, done, _ = env.step(action)
                if done: # Penalty
                    reward = -90
                
                # Save the experience to our buffer
                replay_buffer.append((state, action, reward, next_state, done))
                if len(replay_buffer) > REPLAY_MEMORY:
                    replay_buffer.popleft()

                state = next_state
                step_count += 1
                if step_count > 10000: # Good enough. Let's move on
                    break

            print("Episode: {}  step:  {}".format(episode, step_count))
            if step_count > 10000:
                pass
                # break

            if episode % 10 == 1:  # train every 10 episode
                # Get a random batch of experiences.
                for _ in range(50):
                    minibatch = random.sample(replay_buffer, 10)
                    loss, _ = replay_train(mainDQN, targetDQN, minibatch)

                print("Loss: ", loss)
                #copy q_net -> target_net
                sess.run(copy_ops)

        bot_play(mainDQN)
            
if __name__ == "__main__":
    main()

Episode: 0  step:  14
Episode: 1  step:  8
Loss:  328.695
Episode: 2  step:  24
Episode: 3  step:  20
Episode: 4  step:  29
Episode: 5  step:  79
Episode: 6  step:  112
Episode: 7  step:  58
Episode: 8  step:  53
Episode: 9  step:  81
Episode: 10  step:  31
Episode: 11  step:  38
Loss:  451.431
Episode: 12  step:  64
Episode: 13  step:  58
Episode: 14  step:  17
Episode: 15  step:  64
Episode: 16  step:  60
Episode: 17  step:  32
Episode: 18  step:  56
Episode: 19  step:  55
Episode: 20  step:  75
Episode: 21  step:  51
Loss:  4.12526
Episode: 22  step:  10
Episode: 23  step:  11
Episode: 24  step:  10
Episode: 25  step:  10
Episode: 26  step:  12
Episode: 27  step:  9
Episode: 28  step:  15
Episode: 29  step:  9
Episode: 30  step:  13
Episode: 31  step:  9
Loss:  2.44815
Episode: 32  step:  13
Episode: 33  step:  10
Episode: 34  step:  10
Episode: 35  step:  8
Episode: 36  step:  11
Episode: 37  step:  12
Episode: 38  step:  10
Episode: 39  step:  9
Episode: 40  step:  10
Episode: 41 

Loss:  1.644
Episode: 332  step:  12
Episode: 333  step:  8
Episode: 334  step:  8
Episode: 335  step:  9
Episode: 336  step:  9
Episode: 337  step:  9
Episode: 338  step:  9
Episode: 339  step:  9
Episode: 340  step:  9
Episode: 341  step:  9
Loss:  3.53419
Episode: 342  step:  34
Episode: 343  step:  24
Episode: 344  step:  24
Episode: 345  step:  27
Episode: 346  step:  21
Episode: 347  step:  27
Episode: 348  step:  27
Episode: 349  step:  20
Episode: 350  step:  27
Episode: 351  step:  29
Loss:  4.38337
Episode: 352  step:  45
Episode: 353  step:  19
Episode: 354  step:  19
Episode: 355  step:  17
Episode: 356  step:  20
Episode: 357  step:  28
Episode: 358  step:  30
Episode: 359  step:  21
Episode: 360  step:  30
Episode: 361  step:  26
Loss:  3.61179
Episode: 362  step:  20
Episode: 363  step:  21
Episode: 364  step:  14
Episode: 365  step:  22
Episode: 366  step:  21
Episode: 367  step:  30
Episode: 368  step:  14
Episode: 369  step:  26
Episode: 370  step:  12
Episode: 371  s

Episode: 655  step:  209
Episode: 656  step:  1017
Episode: 657  step:  59
Episode: 658  step:  50
Episode: 659  step:  285
Episode: 660  step:  214
Episode: 661  step:  110
Loss:  4.91964
Episode: 662  step:  9
Episode: 663  step:  10
Episode: 664  step:  9
Episode: 665  step:  11
Episode: 666  step:  8
Episode: 667  step:  8
Episode: 668  step:  10
Episode: 669  step:  9
Episode: 670  step:  8
Episode: 671  step:  8
Loss:  332.805
Episode: 672  step:  25
Episode: 673  step:  59
Episode: 674  step:  29
Episode: 675  step:  42
Episode: 676  step:  43
Episode: 677  step:  24
Episode: 678  step:  25
Episode: 679  step:  37
Episode: 680  step:  25
Episode: 681  step:  68
Loss:  348.732
Episode: 682  step:  12
Episode: 683  step:  26
Episode: 684  step:  22
Episode: 685  step:  25
Episode: 686  step:  15
Episode: 687  step:  26
Episode: 688  step:  35
Episode: 689  step:  21
Episode: 690  step:  12
Episode: 691  step:  20
Loss:  3.07358
Episode: 692  step:  18
Episode: 693  step:  18
Episo

Episode: 958  step:  10000
Episode: 959  step:  10000
Episode: 960  step:  10000
Episode: 961  step:  10000
Loss:  0.755453
Episode: 962  step:  10000
Episode: 963  step:  10000
Episode: 964  step:  10000
Episode: 965  step:  10000
Episode: 966  step:  10000
Episode: 967  step:  10000
Episode: 968  step:  10000
Episode: 969  step:  10000
Episode: 970  step:  10000
Episode: 971  step:  10000
Loss:  1.11733
Episode: 972  step:  10000
Episode: 973  step:  10000
Episode: 974  step:  10000
Episode: 975  step:  10000
Episode: 976  step:  10000
Episode: 977  step:  10000
Episode: 978  step:  10000
Episode: 979  step:  10000
Episode: 980  step:  10000
Episode: 981  step:  10000
Loss:  1.1267
Episode: 982  step:  10000
Episode: 983  step:  10000
Episode: 984  step:  10000
Episode: 985  step:  10000
Episode: 986  step:  10000
Episode: 987  step:  10000
Episode: 988  step:  10000
Episode: 989  step:  10000
Episode: 990  step:  10000
Episode: 991  step:  10000
Loss:  1.15584
Episode: 992  step:  1

Episode: 1238  step:  10000
Episode: 1239  step:  10000
Episode: 1240  step:  10000
Episode: 1241  step:  10000
Loss:  0.950724
Episode: 1242  step:  1423
Episode: 1243  step:  1686
Episode: 1244  step:  1428
Episode: 1245  step:  1407
Episode: 1246  step:  1923
Episode: 1247  step:  1348
Episode: 1248  step:  2080
Episode: 1249  step:  1539
Episode: 1250  step:  1239
Episode: 1251  step:  1649
Loss:  1.0083
Episode: 1252  step:  773
Episode: 1253  step:  2324
Episode: 1254  step:  934
Episode: 1255  step:  1116
Episode: 1256  step:  901
Episode: 1257  step:  787
Episode: 1258  step:  987
Episode: 1259  step:  1066
Episode: 1260  step:  1042
Episode: 1261  step:  914
Loss:  0.872831
Episode: 1262  step:  4885
Episode: 1263  step:  1194
Episode: 1264  step:  3572
Episode: 1265  step:  1357
Episode: 1266  step:  2646
Episode: 1267  step:  1571
Episode: 1268  step:  1621
Episode: 1269  step:  670
Episode: 1270  step:  3061
Episode: 1271  step:  632
Loss:  2.19934
Episode: 1272  step:  105

Episode: 1521  step:  1085
Loss:  0.636942
Episode: 1522  step:  10000
Episode: 1523  step:  10000
Episode: 1524  step:  10000
Episode: 1525  step:  10000
Episode: 1526  step:  10000
Episode: 1527  step:  10000
Episode: 1528  step:  10000
Episode: 1529  step:  10000
Episode: 1530  step:  10000
Episode: 1531  step:  10000
Loss:  0.983239
Episode: 1532  step:  3140
Episode: 1533  step:  4368
Episode: 1534  step:  10000
Episode: 1535  step:  10000
Episode: 1536  step:  974
Episode: 1537  step:  10000
Episode: 1538  step:  10000
Episode: 1539  step:  5610
Episode: 1540  step:  5483
Episode: 1541  step:  10000
Loss:  0.530335
Episode: 1542  step:  3020
Episode: 1543  step:  10000
Episode: 1544  step:  10000
Episode: 1545  step:  1036
Episode: 1546  step:  5150
Episode: 1547  step:  2214
Episode: 1548  step:  4953
Episode: 1549  step:  4720
Episode: 1550  step:  10000
Episode: 1551  step:  10000
Loss:  1.13145
Episode: 1552  step:  2480
Episode: 1553  step:  1861
Episode: 1554  step:  2611
E

Episode: 1808  step:  1197
Episode: 1809  step:  953
Episode: 1810  step:  1069
Episode: 1811  step:  1585
Loss:  0.7947
Episode: 1812  step:  4357
Episode: 1813  step:  2652
Episode: 1814  step:  5465
Episode: 1815  step:  6046
Episode: 1816  step:  6760
Episode: 1817  step:  10000
Episode: 1818  step:  3036
Episode: 1819  step:  5275
Episode: 1820  step:  3057
Episode: 1821  step:  6891
Loss:  0.580977
Episode: 1822  step:  10000
Episode: 1823  step:  10000
Episode: 1824  step:  10000
Episode: 1825  step:  10000
Episode: 1826  step:  10000
Episode: 1827  step:  10000
Episode: 1828  step:  10000
Episode: 1829  step:  10000
Episode: 1830  step:  10000
Episode: 1831  step:  10000
Loss:  0.985141
Episode: 1832  step:  10000
Episode: 1833  step:  10000
Episode: 1834  step:  10000
Episode: 1835  step:  10000
Episode: 1836  step:  10000
Episode: 1837  step:  10000
Episode: 1838  step:  10000
Episode: 1839  step:  10000
Episode: 1840  step:  10000
Episode: 1841  step:  10000
Loss:  1.43338
E

Episode: 2091  step:  10000
Loss:  0.859961
Episode: 2092  step:  10000
Episode: 2093  step:  10000
Episode: 2094  step:  10000
Episode: 2095  step:  10000
Episode: 2096  step:  10000
Episode: 2097  step:  10000
Episode: 2098  step:  10000
Episode: 2099  step:  10000
Episode: 2100  step:  10000
Episode: 2101  step:  10000
Loss:  0.656778
Episode: 2102  step:  10000
Episode: 2103  step:  10000
Episode: 2104  step:  10000
Episode: 2105  step:  10000
Episode: 2106  step:  10000
Episode: 2107  step:  10000
Episode: 2108  step:  10000
Episode: 2109  step:  10000
Episode: 2110  step:  10000
Episode: 2111  step:  10000
Loss:  1.1657
Episode: 2112  step:  2128
Episode: 2113  step:  2164
Episode: 2114  step:  1972
Episode: 2115  step:  2268
Episode: 2116  step:  2006
Episode: 2117  step:  2092
Episode: 2118  step:  2044
Episode: 2119  step:  1835
Episode: 2120  step:  2155
Episode: 2121  step:  1927
Loss:  1.91557
Episode: 2122  step:  10000
Episode: 2123  step:  10000
Episode: 2124  step:  100

Episode: 2381  step:  2446
Loss:  0.246813
Episode: 2382  step:  10000
Episode: 2383  step:  10000
Episode: 2384  step:  10000
Episode: 2385  step:  10000
Episode: 2386  step:  10000
Episode: 2387  step:  10000
Episode: 2388  step:  10000
Episode: 2389  step:  10000
Episode: 2390  step:  10000
Episode: 2391  step:  10000
Loss:  0.964742
Episode: 2392  step:  10000
Episode: 2393  step:  10000
Episode: 2394  step:  10000
Episode: 2395  step:  10000
Episode: 2396  step:  10000
Episode: 2397  step:  10000
Episode: 2398  step:  10000
Episode: 2399  step:  10000
Episode: 2400  step:  10000
Episode: 2401  step:  10000
Loss:  1.0283
Episode: 2402  step:  94
Episode: 2403  step:  154
Episode: 2404  step:  301
Episode: 2405  step:  10000
Episode: 2406  step:  487
Episode: 2407  step:  9466
Episode: 2408  step:  10000
Episode: 2409  step:  10000
Episode: 2410  step:  2765
Episode: 2411  step:  7934
Loss:  3.84264
Episode: 2412  step:  39
Episode: 2413  step:  93
Episode: 2414  step:  66
Episode: 

Episode: 2668  step:  3939
Episode: 2669  step:  2504
Episode: 2670  step:  2170
Episode: 2671  step:  2567
Loss:  3.08752
Episode: 2672  step:  4242
Episode: 2673  step:  10000
Episode: 2674  step:  5861
Episode: 2675  step:  7800
Episode: 2676  step:  9480
Episode: 2677  step:  6282
Episode: 2678  step:  10000
Episode: 2679  step:  6218
Episode: 2680  step:  10000
Episode: 2681  step:  10000
Loss:  1.47624
Episode: 2682  step:  10000
Episode: 2683  step:  6196
Episode: 2684  step:  10000
Episode: 2685  step:  7923
Episode: 2686  step:  5474
Episode: 2687  step:  10000
Episode: 2688  step:  6374
Episode: 2689  step:  6554
Episode: 2690  step:  10000
Episode: 2691  step:  7450
Loss:  2.43917
Episode: 2692  step:  10000
Episode: 2693  step:  10000
Episode: 2694  step:  10000
Episode: 2695  step:  10000
Episode: 2696  step:  10000
Episode: 2697  step:  10000
Episode: 2698  step:  10000
Episode: 2699  step:  10000
Episode: 2700  step:  10000
Episode: 2701  step:  10000
Loss:  2.54282
Epis

Episode: 2955  step:  10000
Episode: 2956  step:  10000
Episode: 2957  step:  10000
Episode: 2958  step:  10000
Episode: 2959  step:  10000
Episode: 2960  step:  10000
Episode: 2961  step:  10000
Loss:  3.49587
Episode: 2962  step:  10000
Episode: 2963  step:  10000
Episode: 2964  step:  10000
Episode: 2965  step:  10000
Episode: 2966  step:  10000
Episode: 2967  step:  10000
Episode: 2968  step:  10000
Episode: 2969  step:  10000
Episode: 2970  step:  10000
Episode: 2971  step:  10000
Loss:  1.96205
Episode: 2972  step:  10000
Episode: 2973  step:  10000
Episode: 2974  step:  10000
Episode: 2975  step:  10000
Episode: 2976  step:  10000
Episode: 2977  step:  10000
Episode: 2978  step:  10000
Episode: 2979  step:  10000
Episode: 2980  step:  10000
Episode: 2981  step:  10000
Loss:  0.961875
Episode: 2982  step:  7718
Episode: 2983  step:  10000
Episode: 2984  step:  10000
Episode: 2985  step:  5634
Episode: 2986  step:  2194
Episode: 2987  step:  10000
Episode: 2988  step:  10000
Episo

Episode: 3240  step:  6076
Episode: 3241  step:  10000
Loss:  1.85927
Episode: 3242  step:  10000
Episode: 3243  step:  10000
Episode: 3244  step:  10000
Episode: 3245  step:  10000
Episode: 3246  step:  10000
Episode: 3247  step:  10000
Episode: 3248  step:  10000
Episode: 3249  step:  10000
Episode: 3250  step:  6913
Episode: 3251  step:  10000
Loss:  1.44427
Episode: 3252  step:  1513
Episode: 3253  step:  6385
Episode: 3254  step:  3425
Episode: 3255  step:  101
Episode: 3256  step:  10000
Episode: 3257  step:  3347
Episode: 3258  step:  6851
Episode: 3259  step:  6523
Episode: 3260  step:  10000
Episode: 3261  step:  5431
Loss:  5.89262
Episode: 3262  step:  342
Episode: 3263  step:  251
Episode: 3264  step:  241
Episode: 3265  step:  176
Episode: 3266  step:  328
Episode: 3267  step:  573
Episode: 3268  step:  212
Episode: 3269  step:  362
Episode: 3270  step:  746
Episode: 3271  step:  96
Loss:  0.476653
Episode: 3272  step:  10000
Episode: 3273  step:  9278
Episode: 3274  step:

Episode: 3527  step:  10000
Episode: 3528  step:  10000
Episode: 3529  step:  10000
Episode: 3530  step:  10000
Episode: 3531  step:  10000
Loss:  1.09099
Episode: 3532  step:  724
Episode: 3533  step:  3471
Episode: 3534  step:  4012
Episode: 3535  step:  3535
Episode: 3536  step:  3990
Episode: 3537  step:  5317
Episode: 3538  step:  9179
Episode: 3539  step:  641
Episode: 3540  step:  7788
Episode: 3541  step:  3560
Loss:  2.23063
Episode: 3542  step:  1041
Episode: 3543  step:  1158
Episode: 3544  step:  2067
Episode: 3545  step:  267
Episode: 3546  step:  556
Episode: 3547  step:  412
Episode: 3548  step:  353
Episode: 3549  step:  520
Episode: 3550  step:  1102
Episode: 3551  step:  1039
Loss:  3.75337
Episode: 3552  step:  10000
Episode: 3553  step:  1239
Episode: 3554  step:  10000
Episode: 3555  step:  10000
Episode: 3556  step:  10000
Episode: 3557  step:  7163
Episode: 3558  step:  8741
Episode: 3559  step:  3567
Episode: 3560  step:  10000
Episode: 3561  step:  10000
Loss: 

Episode: 3814  step:  701
Episode: 3815  step:  233
Episode: 3816  step:  423
Episode: 3817  step:  497
Episode: 3818  step:  261
Episode: 3819  step:  463
Episode: 3820  step:  225
Episode: 3821  step:  297
Loss:  0.410487
Episode: 3822  step:  10000
Episode: 3823  step:  3752
Episode: 3824  step:  10000
Episode: 3825  step:  10000
Episode: 3826  step:  10000
Episode: 3827  step:  10000
Episode: 3828  step:  10000
Episode: 3829  step:  10000
Episode: 3830  step:  10000
Episode: 3831  step:  10000
Loss:  1.40773
Episode: 3832  step:  246
Episode: 3833  step:  222
Episode: 3834  step:  269
Episode: 3835  step:  173
Episode: 3836  step:  190
Episode: 3837  step:  545
Episode: 3838  step:  149
Episode: 3839  step:  250
Episode: 3840  step:  120
Episode: 3841  step:  468
Loss:  3.67758
Episode: 3842  step:  4926
Episode: 3843  step:  8560
Episode: 3844  step:  8603
Episode: 3845  step:  3969
Episode: 3846  step:  10000
Episode: 3847  step:  6280
Episode: 3848  step:  10000
Episode: 3849  s

Loss:  0.356445
Episode: 4102  step:  190
Episode: 4103  step:  620
Episode: 4104  step:  539
Episode: 4105  step:  204
Episode: 4106  step:  262
Episode: 4107  step:  190
Episode: 4108  step:  1318
Episode: 4109  step:  510
Episode: 4110  step:  510
Episode: 4111  step:  346
Loss:  4.49887
Episode: 4112  step:  159
Episode: 4113  step:  2546
Episode: 4114  step:  3437
Episode: 4115  step:  2871
Episode: 4116  step:  466
Episode: 4117  step:  1045
Episode: 4118  step:  4607
Episode: 4119  step:  2147
Episode: 4120  step:  428
Episode: 4121  step:  2855
Loss:  0.316704
Episode: 4122  step:  215
Episode: 4123  step:  186
Episode: 4124  step:  555
Episode: 4125  step:  2222
Episode: 4126  step:  176
Episode: 4127  step:  436
Episode: 4128  step:  1220
Episode: 4129  step:  405
Episode: 4130  step:  1208
Episode: 4131  step:  1274
Loss:  1.95834
Episode: 4132  step:  206
Episode: 4133  step:  197
Episode: 4134  step:  211
Episode: 4135  step:  485
Episode: 4136  step:  299
Episode: 4137  s

Episode: 4392  step:  6502
Episode: 4393  step:  6604
Episode: 4394  step:  1650
Episode: 4395  step:  9915
Episode: 4396  step:  4794
Episode: 4397  step:  2262
Episode: 4398  step:  3029
Episode: 4399  step:  9476
Episode: 4400  step:  2796
Episode: 4401  step:  3351
Loss:  2.4445
Episode: 4402  step:  10000
Episode: 4403  step:  2308
Episode: 4404  step:  7338
Episode: 4405  step:  9357
Episode: 4406  step:  10000
Episode: 4407  step:  6040
Episode: 4408  step:  5271
Episode: 4409  step:  4819
Episode: 4410  step:  9198
Episode: 4411  step:  4999
Loss:  2.4693
Episode: 4412  step:  10000
Episode: 4413  step:  9080
Episode: 4414  step:  4256
Episode: 4415  step:  2245
Episode: 4416  step:  10000
Episode: 4417  step:  8511
Episode: 4418  step:  5529
Episode: 4419  step:  7371
Episode: 4420  step:  3466
Episode: 4421  step:  1029
Loss:  3.4948
Episode: 4422  step:  4524
Episode: 4423  step:  3334
Episode: 4424  step:  7467
Episode: 4425  step:  4202
Episode: 4426  step:  4866
Episode: 

Episode: 4681  step:  3278
Loss:  2.29462
Episode: 4682  step:  1928
Episode: 4683  step:  3513
Episode: 4684  step:  2724
Episode: 4685  step:  2811
Episode: 4686  step:  1425
Episode: 4687  step:  2559
Episode: 4688  step:  2561
Episode: 4689  step:  3778
Episode: 4690  step:  2632
Episode: 4691  step:  1832
Loss:  3.20303
Episode: 4692  step:  2045
Episode: 4693  step:  1284
Episode: 4694  step:  1751
Episode: 4695  step:  2858
Episode: 4696  step:  3425
Episode: 4697  step:  1662
Episode: 4698  step:  2825
Episode: 4699  step:  4703
Episode: 4700  step:  1636
Episode: 4701  step:  2991
Loss:  2.35411
Episode: 4702  step:  2192
Episode: 4703  step:  2840
Episode: 4704  step:  2406
Episode: 4705  step:  2245
Episode: 4706  step:  1833
Episode: 4707  step:  5521
Episode: 4708  step:  3678
Episode: 4709  step:  3239
Episode: 4710  step:  2377
Episode: 4711  step:  4082
Loss:  1.98432
Episode: 4712  step:  1540
Episode: 4713  step:  2145
Episode: 4714  step:  1014
Episode: 4715  step:  

Episode: 4970  step:  4250
Episode: 4971  step:  1390
Loss:  2.47388
Episode: 4972  step:  3953
Episode: 4973  step:  2526
Episode: 4974  step:  1815
Episode: 4975  step:  2525
Episode: 4976  step:  1761
Episode: 4977  step:  2535
Episode: 4978  step:  2916
Episode: 4979  step:  3366
Episode: 4980  step:  2904
Episode: 4981  step:  1601
Loss:  1.74439
Episode: 4982  step:  1634
Episode: 4983  step:  2715
Episode: 4984  step:  2650
Episode: 4985  step:  1430
Episode: 4986  step:  1630
Episode: 4987  step:  2139
Episode: 4988  step:  1737
Episode: 4989  step:  1349
Episode: 4990  step:  2239
Episode: 4991  step:  2739
Loss:  0.8837
Episode: 4992  step:  1558
Episode: 4993  step:  1364
Episode: 4994  step:  2295
Episode: 4995  step:  2060
Episode: 4996  step:  2079
Episode: 4997  step:  2372
Episode: 4998  step:  1506
Episode: 4999  step:  2159


AttributeError: 'NoneType' object has no attribute 'flip'