In [1]:
import numpy as np
import tensorflow as tf
import random
import dqn
from collections import deque

import gym
env = gym.make('CartPole-v0')
#env._max_episode_steps = 400

# Constants defining our neural network
input_size = env.observation_space.shape[0]
output_size = env.action_space.n

mainDQN = None

dis = 0.9
REPLAY_MEMORY = 50000

[2017-11-15 01:13:16,371] Making new env: CartPole-v0


In [2]:
class DQN:
    def __init__(self, session, input_size, output_size, name="main"):
        self.session = session
        self.input_size = input_size
        self.output_size = output_size
        self.net_name = name
        
        self._build_network()
        
    def _build_network(self, h_size = 10, l_rate = 1e-1):
        with tf.variable_scope(self.net_name):
            self._X = tf.placeholder(
                tf.float32, [None, self.input_size], name = "input_x")
        
            # First layer of weights
            W1 = tf.get_variable("W1", shape=[self.input_size, h_size],
                             initializer=tf.contrib.layers.xavier_initializer())
            layer1 = tf.nn.tanh(tf.matmul(self._X, W1))
        
            # Second Layer of weights
            W2 = tf.get_variable("W2", shape=[h_size, self.output_size],
                             initializer=tf.contrib.layers.xavier_initializer())
        
            # Q prediction
            self._Qpred = tf.matmul(layer1, W2)
        
        # We need to define the parts of the network needed for learning a policy
        self._Y = tf.placeholder(
            shape = [None, self.output_size], dtype = tf.float32)
    
        # Loss function
        self._loss = tf.reduce_mean(tf.square(self._Y - self._Qpred))
        # Learning
        self._train = tf.train.AdamOptimizer(
            learning_rate=l_rate).minimize(self._loss)
    
    def predict(self, state):
        x = np.reshape(state, [1, self.input_size])
        return self.session.run(self._Qpred, feed_dict = {self._X: x})
    
    def update(self, x_stack, y_stack):
        return self.session.run([self._loss, self._train], feed_dict = 
                               {self._X: x_stack, self._Y: y_stack})

In [3]:
def replay_train(mainDQN, targetDQN, train_batch):
    x_stack = np.empty(0).reshape(0, input_size)
    y_stack = np.empty(0).reshape(0, output_size)
    
    # Get stored information from the buffer
    for state, action, reward, next_state, done in train_batch:
        Q = mainDQN.predict(state)
        
        # terminal?
        if done:
            Q[0, action] = reward
        else:
            # get target from target DQN (Q')
            Q[0, action] = reward + dis * np.max(targetDQN.predict(next_state))
            
        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack([x_stack, state])
        
    # Train our network using target and predicted Q values on each episode
    return mainDQN.update(x_stack, y_stack)

In [4]:
def get_copy_var_ops(*, dest_scope_name = "target", src_scope_name = "main"):
    # Copy variables src_scope to dest_scope
    op_holder = []
    
    src_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = src_scope_name)
    dest_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = dest_scope_name)
    
    for src_var, dest_var in zip(src_vars, dest_vars):
        op_holder.append(dest_var.assign(src_var.value()))
        
    return op_holder

In [5]:
def bot_play(mainDQN):
    # See our trained network in action
    s = env.reset()
    reward_sum = 0
    while True:
        env.render()
        a = np.argmax(mainDQN.predict(s))
        s, reward, done, _ = env.step(a)
        reward_sum += reward
        if done:
            print("Total score: {}".format(reward_sum))
            break

In [6]:
def main():
    max_episodes = 5000
    # store the previous observations in replay memory
    replay_buffer = deque()
    
    with tf.Session() as sess:
        mainDQN = DQN(sess, input_size, output_size, name = "main")
        targetDQN = DQN(sess, input_size, output_size, name = "target")
        tf.global_variables_initializer().run()
    
        # initial copy q_net -> target_net
        copy_ops = get_copy_var_ops(dest_scope_name = "target", src_scope_name = "main")
    
        sess.run(copy_ops)
    
        for episode in range(max_episodes):
            e = 1. / ((episode / 10) + 1)
            done = False
            step_count = 0
            state = env.reset()
        
            while not done:
                if np.random.rand(1) < e:
                    action = env.action_space.sample()
                else:
                    # Choose an action by greedily from the Q-netsork
                    action = np.argmax(mainDQN.predict(state))
                
                # Get new state and reward from environment
                next_state, reward, done, _ = env.step(action)
                if done: # Penalty
                    reward = -80
                
                # Save the experience to our buffer
                replay_buffer.append((state, action, reward, next_state, done))
                if len(replay_buffer) > REPLAY_MEMORY:
                    replay_buffer.popleft()

                state = next_state
                step_count += 1
                if step_count > 10000: # Good enough. Let's move on
                    break

            print("Episode: {}  step:  {}".format(episode, step_count))
            if step_count > 10000:
                pass
                # break

            if episode % 10 == 1:  # train every 10 episode
                # Get a random batch of experiences.
                for _ in range(50):
                    minibatch = random.sample(replay_buffer, 10)
                    loss, _ = replay_train(mainDQN, targetDQN, minibatch)

                print("Loss: ", loss)
                #copy q_net -> target_net
                sess.run(copy_ops)

        bot_play(mainDQN)
            
if __name__ == "__main__":
    main()

Episode: 0  step:  13
Episode: 1  step:  12
Loss:  250.348
Episode: 2  step:  18
Episode: 3  step:  17
Episode: 4  step:  13
Episode: 5  step:  11
Episode: 6  step:  11
Episode: 7  step:  13
Episode: 8  step:  12
Episode: 9  step:  10
Episode: 10  step:  9
Episode: 11  step:  12
Loss:  400.349
Episode: 12  step:  13
Episode: 13  step:  12
Episode: 14  step:  16
Episode: 15  step:  15
Episode: 16  step:  16
Episode: 17  step:  14
Episode: 18  step:  13
Episode: 19  step:  11
Episode: 20  step:  12
Episode: 21  step:  16
Loss:  2.31308
Episode: 22  step:  34
Episode: 23  step:  19
Episode: 24  step:  19
Episode: 25  step:  12
Episode: 26  step:  15
Episode: 27  step:  17
Episode: 28  step:  17
Episode: 29  step:  17
Episode: 30  step:  21
Episode: 31  step:  16
Loss:  297.457
Episode: 32  step:  28
Episode: 33  step:  19
Episode: 34  step:  29
Episode: 35  step:  22
Episode: 36  step:  21
Episode: 37  step:  15
Episode: 38  step:  17
Episode: 39  step:  15
Episode: 40  step:  17
Episode:

Episode: 328  step:  78
Episode: 329  step:  138
Episode: 330  step:  82
Episode: 331  step:  41
Loss:  1.25491
Episode: 332  step:  54
Episode: 333  step:  42
Episode: 334  step:  57
Episode: 335  step:  59
Episode: 336  step:  48
Episode: 337  step:  40
Episode: 338  step:  72
Episode: 339  step:  42
Episode: 340  step:  50
Episode: 341  step:  43
Loss:  2.17977
Episode: 342  step:  52
Episode: 343  step:  36
Episode: 344  step:  74
Episode: 345  step:  32
Episode: 346  step:  54
Episode: 347  step:  71
Episode: 348  step:  62
Episode: 349  step:  37
Episode: 350  step:  46
Episode: 351  step:  40
Loss:  2.50165
Episode: 352  step:  200
Episode: 353  step:  200
Episode: 354  step:  200
Episode: 355  step:  200
Episode: 356  step:  200
Episode: 357  step:  200
Episode: 358  step:  200
Episode: 359  step:  200
Episode: 360  step:  200
Episode: 361  step:  200
Loss:  314.507
Episode: 362  step:  80
Episode: 363  step:  200
Episode: 364  step:  200
Episode: 365  step:  150
Episode: 366  

Loss:  6.35938
Episode: 652  step:  10
Episode: 653  step:  10
Episode: 654  step:  9
Episode: 655  step:  9
Episode: 656  step:  8
Episode: 657  step:  9
Episode: 658  step:  9
Episode: 659  step:  10
Episode: 660  step:  9
Episode: 661  step:  9
Loss:  4.87512
Episode: 662  step:  60
Episode: 663  step:  54
Episode: 664  step:  53
Episode: 665  step:  54
Episode: 666  step:  37
Episode: 667  step:  42
Episode: 668  step:  62
Episode: 669  step:  36
Episode: 670  step:  85
Episode: 671  step:  47
Loss:  5.58608
Episode: 672  step:  9
Episode: 673  step:  10
Episode: 674  step:  9
Episode: 675  step:  9
Episode: 676  step:  9
Episode: 677  step:  9
Episode: 678  step:  9
Episode: 679  step:  15
Episode: 680  step:  9
Episode: 681  step:  9
Loss:  3.62074
Episode: 682  step:  29
Episode: 683  step:  26
Episode: 684  step:  26
Episode: 685  step:  31
Episode: 686  step:  23
Episode: 687  step:  20
Episode: 688  step:  19
Episode: 689  step:  39
Episode: 690  step:  23
Episode: 691  step:

Episode: 977  step:  71
Episode: 978  step:  24
Episode: 979  step:  22
Episode: 980  step:  45
Episode: 981  step:  26
Loss:  4.24615
Episode: 982  step:  10
Episode: 983  step:  10
Episode: 984  step:  9
Episode: 985  step:  9
Episode: 986  step:  10
Episode: 987  step:  9
Episode: 988  step:  9
Episode: 989  step:  8
Episode: 990  step:  10
Episode: 991  step:  9
Loss:  4.62641
Episode: 992  step:  22
Episode: 993  step:  30
Episode: 994  step:  32
Episode: 995  step:  34
Episode: 996  step:  28
Episode: 997  step:  28
Episode: 998  step:  24
Episode: 999  step:  25
Episode: 1000  step:  38
Episode: 1001  step:  32
Loss:  2.86035
Episode: 1002  step:  10
Episode: 1003  step:  8
Episode: 1004  step:  10
Episode: 1005  step:  9
Episode: 1006  step:  9
Episode: 1007  step:  9
Episode: 1008  step:  8
Episode: 1009  step:  9
Episode: 1010  step:  9
Episode: 1011  step:  9
Loss:  4.6406
Episode: 1012  step:  25
Episode: 1013  step:  39
Episode: 1014  step:  46
Episode: 1015  step:  25
Epi

Episode: 1290  step:  47
Episode: 1291  step:  21
Loss:  5.62036
Episode: 1292  step:  79
Episode: 1293  step:  90
Episode: 1294  step:  72
Episode: 1295  step:  70
Episode: 1296  step:  70
Episode: 1297  step:  76
Episode: 1298  step:  85
Episode: 1299  step:  77
Episode: 1300  step:  86
Episode: 1301  step:  79
Loss:  4.86036
Episode: 1302  step:  8
Episode: 1303  step:  9
Episode: 1304  step:  9
Episode: 1305  step:  10
Episode: 1306  step:  10
Episode: 1307  step:  8
Episode: 1308  step:  9
Episode: 1309  step:  9
Episode: 1310  step:  8
Episode: 1311  step:  9
Loss:  4.8205
Episode: 1312  step:  19
Episode: 1313  step:  23
Episode: 1314  step:  31
Episode: 1315  step:  20
Episode: 1316  step:  19
Episode: 1317  step:  28
Episode: 1318  step:  22
Episode: 1319  step:  26
Episode: 1320  step:  37
Episode: 1321  step:  21
Loss:  326.25
Episode: 1322  step:  31
Episode: 1323  step:  31
Episode: 1324  step:  30
Episode: 1325  step:  95
Episode: 1326  step:  34
Episode: 1327  step:  34


Loss:  5.35908
Episode: 1602  step:  9
Episode: 1603  step:  9
Episode: 1604  step:  10
Episode: 1605  step:  10
Episode: 1606  step:  112
Episode: 1607  step:  9
Episode: 1608  step:  8
Episode: 1609  step:  8
Episode: 1610  step:  10
Episode: 1611  step:  10
Loss:  5.90787
Episode: 1612  step:  24
Episode: 1613  step:  18
Episode: 1614  step:  10
Episode: 1615  step:  56
Episode: 1616  step:  24
Episode: 1617  step:  12
Episode: 1618  step:  18
Episode: 1619  step:  12
Episode: 1620  step:  12
Episode: 1621  step:  42
Loss:  3.24904
Episode: 1622  step:  76
Episode: 1623  step:  70
Episode: 1624  step:  93
Episode: 1625  step:  30
Episode: 1626  step:  45
Episode: 1627  step:  27
Episode: 1628  step:  25
Episode: 1629  step:  28
Episode: 1630  step:  52
Episode: 1631  step:  16
Loss:  1.34923
Episode: 1632  step:  12
Episode: 1633  step:  11
Episode: 1634  step:  25
Episode: 1635  step:  14
Episode: 1636  step:  10
Episode: 1637  step:  11
Episode: 1638  step:  12
Episode: 1639  step

Episode: 1919  step:  29
Episode: 1920  step:  25
Episode: 1921  step:  34
Loss:  2.57625
Episode: 1922  step:  16
Episode: 1923  step:  8
Episode: 1924  step:  10
Episode: 1925  step:  10
Episode: 1926  step:  10
Episode: 1927  step:  17
Episode: 1928  step:  9
Episode: 1929  step:  14
Episode: 1930  step:  12
Episode: 1931  step:  9
Loss:  312.944
Episode: 1932  step:  16
Episode: 1933  step:  9
Episode: 1934  step:  15
Episode: 1935  step:  13
Episode: 1936  step:  10
Episode: 1937  step:  34
Episode: 1938  step:  9
Episode: 1939  step:  8
Episode: 1940  step:  9
Episode: 1941  step:  10
Loss:  340.459
Episode: 1942  step:  28
Episode: 1943  step:  32
Episode: 1944  step:  47
Episode: 1945  step:  33
Episode: 1946  step:  29
Episode: 1947  step:  44
Episode: 1948  step:  23
Episode: 1949  step:  28
Episode: 1950  step:  58
Episode: 1951  step:  43
Loss:  371.204
Episode: 1952  step:  35
Episode: 1953  step:  29
Episode: 1954  step:  20
Episode: 1955  step:  26
Episode: 1956  step:  

Episode: 2231  step:  13
Loss:  5.94381
Episode: 2232  step:  22
Episode: 2233  step:  28
Episode: 2234  step:  39
Episode: 2235  step:  49
Episode: 2236  step:  37
Episode: 2237  step:  26
Episode: 2238  step:  21
Episode: 2239  step:  21
Episode: 2240  step:  26
Episode: 2241  step:  26
Loss:  2.90074
Episode: 2242  step:  25
Episode: 2243  step:  25
Episode: 2244  step:  23
Episode: 2245  step:  36
Episode: 2246  step:  20
Episode: 2247  step:  21
Episode: 2248  step:  20
Episode: 2249  step:  27
Episode: 2250  step:  22
Episode: 2251  step:  25
Loss:  3.21932
Episode: 2252  step:  41
Episode: 2253  step:  26
Episode: 2254  step:  22
Episode: 2255  step:  48
Episode: 2256  step:  20
Episode: 2257  step:  23
Episode: 2258  step:  31
Episode: 2259  step:  29
Episode: 2260  step:  29
Episode: 2261  step:  28
Loss:  3.84753
Episode: 2262  step:  22
Episode: 2263  step:  27
Episode: 2264  step:  30
Episode: 2265  step:  22
Episode: 2266  step:  24
Episode: 2267  step:  36
Episode: 2268  

Loss:  3.17254
Episode: 2542  step:  19
Episode: 2543  step:  20
Episode: 2544  step:  26
Episode: 2545  step:  18
Episode: 2546  step:  20
Episode: 2547  step:  28
Episode: 2548  step:  21
Episode: 2549  step:  48
Episode: 2550  step:  15
Episode: 2551  step:  26
Loss:  2.15837
Episode: 2552  step:  14
Episode: 2553  step:  23
Episode: 2554  step:  17
Episode: 2555  step:  16
Episode: 2556  step:  19
Episode: 2557  step:  14
Episode: 2558  step:  14
Episode: 2559  step:  28
Episode: 2560  step:  19
Episode: 2561  step:  15
Loss:  388.162
Episode: 2562  step:  60
Episode: 2563  step:  62
Episode: 2564  step:  44
Episode: 2565  step:  64
Episode: 2566  step:  52
Episode: 2567  step:  82
Episode: 2568  step:  46
Episode: 2569  step:  61
Episode: 2570  step:  79
Episode: 2571  step:  52
Loss:  5.76437
Episode: 2572  step:  42
Episode: 2573  step:  30
Episode: 2574  step:  43
Episode: 2575  step:  56
Episode: 2576  step:  31
Episode: 2577  step:  27
Episode: 2578  step:  27
Episode: 2579  

Episode: 2858  step:  42
Episode: 2859  step:  42
Episode: 2860  step:  40
Episode: 2861  step:  49
Loss:  667.27
Episode: 2862  step:  37
Episode: 2863  step:  36
Episode: 2864  step:  50
Episode: 2865  step:  41
Episode: 2866  step:  60
Episode: 2867  step:  42
Episode: 2868  step:  46
Episode: 2869  step:  41
Episode: 2870  step:  46
Episode: 2871  step:  37
Loss:  295.252
Episode: 2872  step:  42
Episode: 2873  step:  74
Episode: 2874  step:  42
Episode: 2875  step:  38
Episode: 2876  step:  37
Episode: 2877  step:  44
Episode: 2878  step:  55
Episode: 2879  step:  37
Episode: 2880  step:  85
Episode: 2881  step:  51
Loss:  2.60764
Episode: 2882  step:  66
Episode: 2883  step:  79
Episode: 2884  step:  69
Episode: 2885  step:  81
Episode: 2886  step:  52
Episode: 2887  step:  58
Episode: 2888  step:  127
Episode: 2889  step:  43
Episode: 2890  step:  79
Episode: 2891  step:  127
Loss:  5.53812
Episode: 2892  step:  23
Episode: 2893  step:  32
Episode: 2894  step:  27
Episode: 2895 

Episode: 3168  step:  32
Episode: 3169  step:  32
Episode: 3170  step:  37
Episode: 3171  step:  30
Loss:  6.45402
Episode: 3172  step:  8
Episode: 3173  step:  8
Episode: 3174  step:  8
Episode: 3175  step:  9
Episode: 3176  step:  14
Episode: 3177  step:  9
Episode: 3178  step:  9
Episode: 3179  step:  9
Episode: 3180  step:  8
Episode: 3181  step:  8
Loss:  310.087
Episode: 3182  step:  28
Episode: 3183  step:  30
Episode: 3184  step:  22
Episode: 3185  step:  22
Episode: 3186  step:  22
Episode: 3187  step:  32
Episode: 3188  step:  18
Episode: 3189  step:  26
Episode: 3190  step:  28
Episode: 3191  step:  25
Loss:  3.12326
Episode: 3192  step:  50
Episode: 3193  step:  43
Episode: 3194  step:  35
Episode: 3195  step:  30
Episode: 3196  step:  22
Episode: 3197  step:  21
Episode: 3198  step:  22
Episode: 3199  step:  81
Episode: 3200  step:  21
Episode: 3201  step:  34
Loss:  1.51429
Episode: 3202  step:  19
Episode: 3203  step:  27
Episode: 3204  step:  31
Episode: 3205  step:  28

Loss:  310.283
Episode: 3482  step:  29
Episode: 3483  step:  29
Episode: 3484  step:  22
Episode: 3485  step:  22
Episode: 3486  step:  19
Episode: 3487  step:  20
Episode: 3488  step:  30
Episode: 3489  step:  24
Episode: 3490  step:  45
Episode: 3491  step:  35
Loss:  7.94403
Episode: 3492  step:  12
Episode: 3493  step:  14
Episode: 3494  step:  20
Episode: 3495  step:  13
Episode: 3496  step:  14
Episode: 3497  step:  12
Episode: 3498  step:  12
Episode: 3499  step:  14
Episode: 3500  step:  17
Episode: 3501  step:  12
Loss:  326.604
Episode: 3502  step:  72
Episode: 3503  step:  45
Episode: 3504  step:  76
Episode: 3505  step:  44
Episode: 3506  step:  69
Episode: 3507  step:  61
Episode: 3508  step:  41
Episode: 3509  step:  95
Episode: 3510  step:  57
Episode: 3511  step:  39
Loss:  4.62646
Episode: 3512  step:  12
Episode: 3513  step:  11
Episode: 3514  step:  14
Episode: 3515  step:  10
Episode: 3516  step:  19
Episode: 3517  step:  8
Episode: 3518  step:  12
Episode: 3519  s

Loss:  5.96745
Episode: 3792  step:  31
Episode: 3793  step:  26
Episode: 3794  step:  24
Episode: 3795  step:  27
Episode: 3796  step:  22
Episode: 3797  step:  23
Episode: 3798  step:  22
Episode: 3799  step:  23
Episode: 3800  step:  54
Episode: 3801  step:  25
Loss:  1.33826
Episode: 3802  step:  23
Episode: 3803  step:  21
Episode: 3804  step:  22
Episode: 3805  step:  26
Episode: 3806  step:  25
Episode: 3807  step:  25
Episode: 3808  step:  26
Episode: 3809  step:  24
Episode: 3810  step:  21
Episode: 3811  step:  23
Loss:  295.316
Episode: 3812  step:  19
Episode: 3813  step:  66
Episode: 3814  step:  20
Episode: 3815  step:  28
Episode: 3816  step:  20
Episode: 3817  step:  26
Episode: 3818  step:  27
Episode: 3819  step:  21
Episode: 3820  step:  26
Episode: 3821  step:  22
Loss:  5.77096
Episode: 3822  step:  20
Episode: 3823  step:  23
Episode: 3824  step:  19
Episode: 3825  step:  19
Episode: 3826  step:  21
Episode: 3827  step:  25
Episode: 3828  step:  25
Episode: 3829  

Loss:  3.92381
Episode: 4102  step:  46
Episode: 4103  step:  23
Episode: 4104  step:  30
Episode: 4105  step:  22
Episode: 4106  step:  20
Episode: 4107  step:  31
Episode: 4108  step:  21
Episode: 4109  step:  71
Episode: 4110  step:  37
Episode: 4111  step:  19
Loss:  6.06956
Episode: 4112  step:  46
Episode: 4113  step:  54
Episode: 4114  step:  49
Episode: 4115  step:  34
Episode: 4116  step:  39
Episode: 4117  step:  35
Episode: 4118  step:  45
Episode: 4119  step:  54
Episode: 4120  step:  46
Episode: 4121  step:  55
Loss:  4.2866
Episode: 4122  step:  9
Episode: 4123  step:  13
Episode: 4124  step:  10
Episode: 4125  step:  9
Episode: 4126  step:  8
Episode: 4127  step:  9
Episode: 4128  step:  9
Episode: 4129  step:  9
Episode: 4130  step:  8
Episode: 4131  step:  9
Loss:  2.95386
Episode: 4132  step:  28
Episode: 4133  step:  20
Episode: 4134  step:  24
Episode: 4135  step:  22
Episode: 4136  step:  22
Episode: 4137  step:  20
Episode: 4138  step:  28
Episode: 4139  step:  21

Loss:  6.98097
Episode: 4412  step:  23
Episode: 4413  step:  43
Episode: 4414  step:  29
Episode: 4415  step:  60
Episode: 4416  step:  26
Episode: 4417  step:  30
Episode: 4418  step:  24
Episode: 4419  step:  25
Episode: 4420  step:  50
Episode: 4421  step:  20
Loss:  4.00519
Episode: 4422  step:  21
Episode: 4423  step:  24
Episode: 4424  step:  29
Episode: 4425  step:  32
Episode: 4426  step:  23
Episode: 4427  step:  31
Episode: 4428  step:  21
Episode: 4429  step:  23
Episode: 4430  step:  23
Episode: 4431  step:  24
Loss:  1.99508
Episode: 4432  step:  36
Episode: 4433  step:  37
Episode: 4434  step:  39
Episode: 4435  step:  40
Episode: 4436  step:  33
Episode: 4437  step:  39
Episode: 4438  step:  35
Episode: 4439  step:  38
Episode: 4440  step:  61
Episode: 4441  step:  32
Loss:  3.77514
Episode: 4442  step:  18
Episode: 4443  step:  18
Episode: 4444  step:  11
Episode: 4445  step:  9
Episode: 4446  step:  47
Episode: 4447  step:  29
Episode: 4448  step:  21
Episode: 4449  s

Loss:  2.48414
Episode: 4722  step:  79
Episode: 4723  step:  72
Episode: 4724  step:  78
Episode: 4725  step:  78
Episode: 4726  step:  93
Episode: 4727  step:  104
Episode: 4728  step:  106
Episode: 4729  step:  81
Episode: 4730  step:  95
Episode: 4731  step:  72
Loss:  4.95631
Episode: 4732  step:  30
Episode: 4733  step:  46
Episode: 4734  step:  34
Episode: 4735  step:  25
Episode: 4736  step:  33
Episode: 4737  step:  43
Episode: 4738  step:  29
Episode: 4739  step:  33
Episode: 4740  step:  30
Episode: 4741  step:  25
Loss:  352.066
Episode: 4742  step:  25
Episode: 4743  step:  24
Episode: 4744  step:  18
Episode: 4745  step:  20
Episode: 4746  step:  18
Episode: 4747  step:  20
Episode: 4748  step:  16
Episode: 4749  step:  19
Episode: 4750  step:  18
Episode: 4751  step:  26
Loss:  4.07409
Episode: 4752  step:  47
Episode: 4753  step:  43
Episode: 4754  step:  25
Episode: 4755  step:  33
Episode: 4756  step:  27
Episode: 4757  step:  31
Episode: 4758  step:  27
Episode: 4759

AttributeError: 'NoneType' object has no attribute 'flip'