In [1]:
import gym
import numpy as np
from keras import backend as K
from keras.models import Model, load_model
from keras.layers import Input, Dense
from keras.callbacks import TensorBoard
import random
import math
from time import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class ReplayBuffer:
    def __init__(self, max_size):
        self.max_size = max_size
        self.index = 0
        self.buffer = [None] * max_size
        self.count = 0

    """
    Add state, action, reward and next_state to replay buffer
    Use None for done state
    """
    def add(self, state, action, reward, next_state):
        self.buffer[self.index] = (state, action, reward, next_state)
        self.index = (self.index + 1) % self.max_size
        self.count = min(self.count + 1, self.max_size)

    def __len__(self):
        """
        Return current item count in replay buffer
        """
        return self.count

    def sample(self, count):
        """
        Sample `count` items from replay buffer
        Filter None items
        So result array may contain less items than count
        """
        count = min(count, self.count)
        samples = [s for s in self.buffer if s is not None]
        return random.sample(samples, count)

In [3]:
BUFFER_SIZE = 10000
BATCH_SIZE = 32

COPY_WEIGHTS_INTERVAL = 1000
    
MIN_EPSILON = 0.01
MAX_EPSILON = 1.0
LAMBDA = 0.001
    
GAMMA = 0.99

In [4]:
class Learner:
    def __init__(self, learner_type = 'dqn-v0', ob_dimen = 0, act_dimen = 0, buffer_size = BUFFER_SIZE):
        self.learner_type = learner_type
        self.ob_dimen = ob_dimen
        self.act_dimen = act_dimen
        self.replay_buffer = ReplayBuffer(buffer_size)
        self.act_counter = 0
        self.tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
        self.model = self._build_model()
        self.target_model = self._build_model()
        # set same weights initially
        self._update_target_network()
        
        # only compile model because we don't train on target network, we just use it to preduct q'
        self.model.compile(optimizer='adam', loss='mse')
        
    def _build_model(self):
        input_ob = Input(shape=(self.ob_dimen,), dtype='float32')
        fc1 = Dense(512, activation='relu')(input_ob)
        fc2 = Dense(64, activation='relu')(fc1)
        out = Dense(self.act_dimen)(fc2)
        model = Model(inputs=input_ob, outputs=out)
        return model
        
    def _predict(self, ob):
        return self.model.predict(np.asarray([ob]))
    
    def act(self, ob):
        self.act_counter += 1
        epsilon = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * math.exp(-LAMBDA * self.act_counter)
        rand = random.random()
        if len(self.replay_buffer) < BATCH_SIZE or rand < epsilon:
            return random.randrange(self.act_dimen)
        else:
            return np.argmax(self._predict(ob))
    
    def learn(self, ob, act, reward, next_ob):
        self.replay_buffer.add(ob, act, reward, next_ob)
        
        self._replay()
        
        if self.act_counter % COPY_WEIGHTS_INTERVAL == 0:
            self._update_target_network()
        
    def _replay(self):
        if len(self.replay_buffer) >= BATCH_SIZE:
            samples = self.replay_buffer.sample(BATCH_SIZE)
            empty_state = np.zeros(self.ob_dimen)
            obs = np.asarray([s[0] for s in samples])
            next_obs = np.asarray([s[3] if s[3] is not None else empty_state for s in samples])
            
            qs = self.model.predict(obs)
            next_qs = self.target_model.predict(next_obs)
            
            # if next state of i is terminal state(next_obs[i] is None), q value is: r
            # else, q value is: r + GAMMA * max(q(s'))
            for idx, sample in enumerate(samples):
                act = sample[1]
                reward = sample[2]
                done = sample[3] is None
                if done:
                    qs[idx][act] = reward
                else:
                    qs[idx][act] = reward + GAMMA * np.max(next_qs[idx])

            self.model.fit(obs, qs, verbose=0, callbacks=[self.tensorboard])
            
    def _update_target_network(self):
        print("Updating Network weights.")
        self.target_model.set_weights(self.model.get_weights())
    
    def save(self, path):
        self.model.save(path)
    
    def load(self, path):
        self.model = load_model(path)

In [5]:
class Agent:
    def __init__(self, env_name = 'CartPole-v0', learner = 'dqn-v0'):
        self.env = gym.make(env_name)
        self.ob_dimen = self.env.observation_space.shape[0]
        self.act_dimen = self.env.action_space.n
        self.learner = Learner(learner, self.ob_dimen, self.act_dimen)
    
    def start(self, epoch_count = 1000):
        for epc in range(1, epoch_count + 1):
            total_reward = 0
            done = False
            ob = self.env.reset()
            while not done:
                act = self.learner.act(ob)
                next_ob, reward, done, _ = self.env.step(act)
                
                self.learner.learn(ob, act, reward, None if done else next_ob)
                
                total_reward += reward
                ob = next_ob
            
            print("Epoch %s, Total Reward: %s" % (epc, total_reward))
            
    def save(self, path):
        self.learner.save(path)
    
    def load(self, path):
        self.learner.load(path)


In [6]:
#agent = Agent()
agent = Agent(env_name='MountainCar-v0')
agent.start(5000)
#agent.save('./cartpole-v0-dqn-v0.h5')
agent.save('./mountaincar-v0-dqn-v0.h5')

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
Updating Network weights.
Epoch 1, Total Reward: -200.0
Epoch 2, Total Reward: -200.0
Epoch 3, Total Reward: -200.0
Epoch 4, Total Reward: -200.0
Updating Network weights.
Epoch 5, Total Reward: -200.0
Epoch 6, Total Reward: -200.0
Epoch 7, Total Reward: -200.0
Epoch 8, Total Reward: -200.0
Epoch 9, Total Reward: -200.0
Updating Network weights.
Epoch 10, Total Reward: -200.0
Epoch 11, Total Reward: -200.0
Epoch 12, Total Reward: -200.0
Epoch 13, Total Reward: -200.0
Epoch 14, Total Reward: -200.0
Updating Network weights.
Epoch 15, Total Reward: -200.0
Epoch 16, Total Reward: -200.0
Epoch 17, Total Reward: -200.0
Epoch 18, Total Reward: -200.0
Epoch 19, Total Reward: -200.0
Updating Network weights.
Epoch 20, Total Reward: -200.0
Epoch 21, Total Reward: -200.0
Epoch 22, Total Reward: -200.0
Epoch 23, Total Reward: -200.0
Epoch 24, Total Reward: -200.0
Updating Network weights.
E

Epoch 223, Total Reward: -200.0
Epoch 224, Total Reward: -193.0
Epoch 225, Total Reward: -92.0
Updating Network weights.
Epoch 226, Total Reward: -200.0
Epoch 227, Total Reward: -177.0
Epoch 228, Total Reward: -170.0
Epoch 229, Total Reward: -170.0
Epoch 230, Total Reward: -200.0
Epoch 231, Total Reward: -97.0
Updating Network weights.
Epoch 232, Total Reward: -177.0
Epoch 233, Total Reward: -200.0
Epoch 234, Total Reward: -153.0
Epoch 235, Total Reward: -180.0
Epoch 236, Total Reward: -200.0
Updating Network weights.
Epoch 237, Total Reward: -163.0
Epoch 238, Total Reward: -200.0
Epoch 239, Total Reward: -161.0
Epoch 240, Total Reward: -101.0
Epoch 241, Total Reward: -200.0
Epoch 242, Total Reward: -153.0
Epoch 243, Total Reward: -166.0
Updating Network weights.
Epoch 244, Total Reward: -200.0
Epoch 245, Total Reward: -87.0
Epoch 246, Total Reward: -158.0
Epoch 247, Total Reward: -200.0
Epoch 248, Total Reward: -200.0
Updating Network weights.
Epoch 249, Total Reward: -200.0
Epoch 250

Epoch 447, Total Reward: -155.0
Epoch 448, Total Reward: -200.0
Epoch 449, Total Reward: -200.0
Epoch 450, Total Reward: -181.0
Epoch 451, Total Reward: -163.0
Epoch 452, Total Reward: -160.0
Updating Network weights.
Epoch 453, Total Reward: -200.0
Epoch 454, Total Reward: -146.0
Epoch 455, Total Reward: -104.0
Epoch 456, Total Reward: -200.0
Epoch 457, Total Reward: -87.0
Epoch 458, Total Reward: -200.0
Updating Network weights.
Epoch 459, Total Reward: -200.0
Epoch 460, Total Reward: -162.0
Epoch 461, Total Reward: -157.0
Epoch 462, Total Reward: -200.0
Epoch 463, Total Reward: -168.0
Epoch 464, Total Reward: -200.0
Updating Network weights.
Epoch 465, Total Reward: -200.0
Epoch 466, Total Reward: -200.0
Epoch 467, Total Reward: -200.0
Epoch 468, Total Reward: -170.0
Epoch 469, Total Reward: -197.0
Updating Network weights.
Epoch 470, Total Reward: -200.0
Epoch 471, Total Reward: -170.0
Epoch 472, Total Reward: -88.0
Epoch 473, Total Reward: -87.0
Epoch 474, Total Reward: -200.0
Epo

Epoch 679, Total Reward: -146.0
Epoch 680, Total Reward: -147.0
Epoch 681, Total Reward: -123.0
Updating Network weights.
Epoch 682, Total Reward: -128.0
Epoch 683, Total Reward: -149.0
Epoch 684, Total Reward: -166.0
Epoch 685, Total Reward: -93.0
Epoch 686, Total Reward: -157.0
Epoch 687, Total Reward: -109.0
Epoch 688, Total Reward: -161.0
Epoch 689, Total Reward: -84.0
Updating Network weights.
Epoch 690, Total Reward: -103.0
Epoch 691, Total Reward: -134.0
Epoch 692, Total Reward: -149.0
Epoch 693, Total Reward: -106.0
Epoch 694, Total Reward: -146.0
Epoch 695, Total Reward: -151.0
Epoch 696, Total Reward: -104.0
Epoch 697, Total Reward: -104.0
Updating Network weights.
Epoch 698, Total Reward: -104.0
Epoch 699, Total Reward: -107.0
Epoch 700, Total Reward: -104.0
Epoch 701, Total Reward: -108.0
Epoch 702, Total Reward: -106.0
Epoch 703, Total Reward: -84.0
Epoch 704, Total Reward: -104.0
Epoch 705, Total Reward: -151.0
Epoch 706, Total Reward: -87.0
Updating Network weights.
Epoc

Epoch 916, Total Reward: -129.0
Epoch 917, Total Reward: -108.0
Epoch 918, Total Reward: -86.0
Epoch 919, Total Reward: -87.0
Epoch 920, Total Reward: -87.0
Epoch 921, Total Reward: -86.0
Epoch 922, Total Reward: -145.0
Epoch 923, Total Reward: -163.0
Updating Network weights.
Epoch 924, Total Reward: -108.0
Epoch 925, Total Reward: -130.0
Epoch 926, Total Reward: -107.0
Epoch 927, Total Reward: -100.0
Epoch 928, Total Reward: -110.0
Epoch 929, Total Reward: -106.0
Epoch 930, Total Reward: -143.0
Epoch 931, Total Reward: -142.0
Updating Network weights.
Epoch 932, Total Reward: -150.0
Epoch 933, Total Reward: -107.0
Epoch 934, Total Reward: -105.0
Epoch 935, Total Reward: -101.0
Epoch 936, Total Reward: -99.0
Epoch 937, Total Reward: -124.0
Epoch 938, Total Reward: -139.0
Epoch 939, Total Reward: -128.0
Epoch 940, Total Reward: -144.0
Updating Network weights.
Epoch 941, Total Reward: -136.0
Epoch 942, Total Reward: -134.0
Epoch 943, Total Reward: -108.0
Epoch 944, Total Reward: -105.0

Epoch 1150, Total Reward: -108.0
Epoch 1151, Total Reward: -103.0
Updating Network weights.
Epoch 1152, Total Reward: -88.0
Epoch 1153, Total Reward: -102.0
Epoch 1154, Total Reward: -93.0
Epoch 1155, Total Reward: -102.0
Epoch 1156, Total Reward: -102.0
Epoch 1157, Total Reward: -89.0
Epoch 1158, Total Reward: -105.0
Epoch 1159, Total Reward: -103.0
Epoch 1160, Total Reward: -103.0
Epoch 1161, Total Reward: -109.0
Updating Network weights.
Epoch 1162, Total Reward: -106.0
Epoch 1163, Total Reward: -106.0
Epoch 1164, Total Reward: -104.0
Epoch 1165, Total Reward: -106.0
Epoch 1166, Total Reward: -106.0
Epoch 1167, Total Reward: -108.0
Epoch 1168, Total Reward: -92.0
Epoch 1169, Total Reward: -111.0
Epoch 1170, Total Reward: -99.0
Epoch 1171, Total Reward: -84.0
Updating Network weights.
Epoch 1172, Total Reward: -105.0
Epoch 1173, Total Reward: -104.0
Epoch 1174, Total Reward: -110.0
Epoch 1175, Total Reward: -111.0
Epoch 1176, Total Reward: -112.0
Epoch 1177, Total Reward: -92.0
Epoch

Epoch 1382, Total Reward: -104.0
Epoch 1383, Total Reward: -104.0
Epoch 1384, Total Reward: -104.0
Epoch 1385, Total Reward: -95.0
Epoch 1386, Total Reward: -113.0
Epoch 1387, Total Reward: -110.0
Updating Network weights.
Epoch 1388, Total Reward: -139.0
Epoch 1389, Total Reward: -85.0
Epoch 1390, Total Reward: -106.0
Epoch 1391, Total Reward: -111.0
Epoch 1392, Total Reward: -111.0
Epoch 1393, Total Reward: -115.0
Epoch 1394, Total Reward: -105.0
Epoch 1395, Total Reward: -92.0
Epoch 1396, Total Reward: -105.0
Epoch 1397, Total Reward: -94.0
Updating Network weights.
Epoch 1398, Total Reward: -180.0
Epoch 1399, Total Reward: -140.0
Epoch 1400, Total Reward: -106.0
Epoch 1401, Total Reward: -149.0
Epoch 1402, Total Reward: -117.0
Epoch 1403, Total Reward: -157.0
Epoch 1404, Total Reward: -118.0
Updating Network weights.
Epoch 1405, Total Reward: -108.0
Epoch 1406, Total Reward: -155.0
Epoch 1407, Total Reward: -110.0
Epoch 1408, Total Reward: -91.0
Epoch 1409, Total Reward: -87.0
Epoc

Epoch 1613, Total Reward: -92.0
Epoch 1614, Total Reward: -106.0
Epoch 1615, Total Reward: -116.0
Epoch 1616, Total Reward: -83.0
Epoch 1617, Total Reward: -96.0
Epoch 1618, Total Reward: -86.0
Epoch 1619, Total Reward: -105.0
Epoch 1620, Total Reward: -90.0
Epoch 1621, Total Reward: -108.0
Epoch 1622, Total Reward: -104.0
Updating Network weights.
Epoch 1623, Total Reward: -107.0
Epoch 1624, Total Reward: -116.0
Epoch 1625, Total Reward: -124.0
Epoch 1626, Total Reward: -168.0
Epoch 1627, Total Reward: -106.0
Epoch 1628, Total Reward: -105.0
Epoch 1629, Total Reward: -103.0
Epoch 1630, Total Reward: -105.0
Epoch 1631, Total Reward: -105.0
Updating Network weights.
Epoch 1632, Total Reward: -89.0
Epoch 1633, Total Reward: -84.0
Epoch 1634, Total Reward: -86.0
Epoch 1635, Total Reward: -91.0
Epoch 1636, Total Reward: -84.0
Epoch 1637, Total Reward: -110.0
Epoch 1638, Total Reward: -84.0
Epoch 1639, Total Reward: -84.0
Epoch 1640, Total Reward: -88.0
Epoch 1641, Total Reward: -85.0
Epoch

Epoch 1846, Total Reward: -104.0
Updating Network weights.
Epoch 1847, Total Reward: -104.0
Epoch 1848, Total Reward: -106.0
Epoch 1849, Total Reward: -101.0
Epoch 1850, Total Reward: -87.0
Epoch 1851, Total Reward: -105.0
Epoch 1852, Total Reward: -104.0
Epoch 1853, Total Reward: -107.0
Epoch 1854, Total Reward: -104.0
Epoch 1855, Total Reward: -104.0
Epoch 1856, Total Reward: -105.0
Updating Network weights.
Epoch 1857, Total Reward: -105.0
Epoch 1858, Total Reward: -106.0
Epoch 1859, Total Reward: -108.0
Epoch 1860, Total Reward: -108.0
Epoch 1861, Total Reward: -111.0
Epoch 1862, Total Reward: -108.0
Epoch 1863, Total Reward: -103.0
Epoch 1864, Total Reward: -100.0
Epoch 1865, Total Reward: -87.0
Updating Network weights.
Epoch 1866, Total Reward: -108.0
Epoch 1867, Total Reward: -94.0
Epoch 1868, Total Reward: -109.0
Epoch 1869, Total Reward: -110.0
Epoch 1870, Total Reward: -108.0
Epoch 1871, Total Reward: -107.0
Epoch 1872, Total Reward: -108.0
Epoch 1873, Total Reward: -107.0
E

Epoch 2078, Total Reward: -105.0
Epoch 2079, Total Reward: -86.0
Epoch 2080, Total Reward: -90.0
Updating Network weights.
Epoch 2081, Total Reward: -92.0
Epoch 2082, Total Reward: -90.0
Epoch 2083, Total Reward: -97.0
Epoch 2084, Total Reward: -178.0
Epoch 2085, Total Reward: -96.0
Epoch 2086, Total Reward: -105.0
Epoch 2087, Total Reward: -90.0
Epoch 2088, Total Reward: -106.0
Epoch 2089, Total Reward: -90.0
Updating Network weights.
Epoch 2090, Total Reward: -105.0
Epoch 2091, Total Reward: -91.0
Epoch 2092, Total Reward: -107.0
Epoch 2093, Total Reward: -105.0
Epoch 2094, Total Reward: -106.0
Epoch 2095, Total Reward: -108.0
Epoch 2096, Total Reward: -106.0
Epoch 2097, Total Reward: -108.0
Epoch 2098, Total Reward: -107.0
Epoch 2099, Total Reward: -103.0
Updating Network weights.
Epoch 2100, Total Reward: -189.0
Epoch 2101, Total Reward: -89.0
Epoch 2102, Total Reward: -103.0
Epoch 2103, Total Reward: -151.0
Epoch 2104, Total Reward: -108.0
Epoch 2105, Total Reward: -89.0
Epoch 210

Updating Network weights.
Epoch 2310, Total Reward: -152.0
Epoch 2311, Total Reward: -88.0
Epoch 2312, Total Reward: -88.0
Epoch 2313, Total Reward: -105.0
Epoch 2314, Total Reward: -161.0
Epoch 2315, Total Reward: -106.0
Epoch 2316, Total Reward: -150.0
Epoch 2317, Total Reward: -88.0
Updating Network weights.
Epoch 2318, Total Reward: -87.0
Epoch 2319, Total Reward: -105.0
Epoch 2320, Total Reward: -116.0
Epoch 2321, Total Reward: -96.0
Epoch 2322, Total Reward: -93.0
Epoch 2323, Total Reward: -150.0
Epoch 2324, Total Reward: -164.0
Epoch 2325, Total Reward: -104.0
Epoch 2326, Total Reward: -93.0
Updating Network weights.
Epoch 2327, Total Reward: -158.0
Epoch 2328, Total Reward: -94.0
Epoch 2329, Total Reward: -105.0
Epoch 2330, Total Reward: -86.0
Epoch 2331, Total Reward: -113.0
Epoch 2332, Total Reward: -109.0
Epoch 2333, Total Reward: -107.0
Epoch 2334, Total Reward: -105.0
Epoch 2335, Total Reward: -98.0
Epoch 2336, Total Reward: -88.0
Updating Network weights.
Epoch 2337, Tota

Epoch 2540, Total Reward: -87.0
Epoch 2541, Total Reward: -107.0
Epoch 2542, Total Reward: -85.0
Epoch 2543, Total Reward: -84.0
Epoch 2544, Total Reward: -107.0
Epoch 2545, Total Reward: -92.0
Epoch 2546, Total Reward: -84.0
Epoch 2547, Total Reward: -97.0
Epoch 2548, Total Reward: -107.0
Epoch 2549, Total Reward: -86.0
Updating Network weights.
Epoch 2550, Total Reward: -105.0
Epoch 2551, Total Reward: -88.0
Epoch 2552, Total Reward: -104.0
Epoch 2553, Total Reward: -102.0
Epoch 2554, Total Reward: -106.0
Epoch 2555, Total Reward: -103.0
Epoch 2556, Total Reward: -107.0
Epoch 2557, Total Reward: -107.0
Epoch 2558, Total Reward: -102.0
Epoch 2559, Total Reward: -101.0
Updating Network weights.
Epoch 2560, Total Reward: -106.0
Epoch 2561, Total Reward: -106.0
Epoch 2562, Total Reward: -104.0
Epoch 2563, Total Reward: -99.0
Epoch 2564, Total Reward: -200.0
Epoch 2565, Total Reward: -90.0
Epoch 2566, Total Reward: -87.0
Epoch 2567, Total Reward: -122.0
Epoch 2568, Total Reward: -105.0
Up

Epoch 2773, Total Reward: -107.0
Epoch 2774, Total Reward: -104.0
Updating Network weights.
Epoch 2775, Total Reward: -105.0
Epoch 2776, Total Reward: -107.0
Epoch 2777, Total Reward: -114.0
Epoch 2778, Total Reward: -110.0
Epoch 2779, Total Reward: -102.0
Epoch 2780, Total Reward: -107.0
Epoch 2781, Total Reward: -123.0
Epoch 2782, Total Reward: -106.0
Epoch 2783, Total Reward: -106.0
Epoch 2784, Total Reward: -105.0
Updating Network weights.
Epoch 2785, Total Reward: -102.0
Epoch 2786, Total Reward: -106.0
Epoch 2787, Total Reward: -96.0
Epoch 2788, Total Reward: -103.0
Epoch 2789, Total Reward: -108.0
Epoch 2790, Total Reward: -109.0
Epoch 2791, Total Reward: -125.0
Epoch 2792, Total Reward: -100.0
Epoch 2793, Total Reward: -107.0
Updating Network weights.
Epoch 2794, Total Reward: -94.0
Epoch 2795, Total Reward: -110.0
Epoch 2796, Total Reward: -106.0
Epoch 2797, Total Reward: -105.0
Epoch 2798, Total Reward: -86.0
Epoch 2799, Total Reward: -103.0
Epoch 2800, Total Reward: -104.0
E

Epoch 3005, Total Reward: -108.0
Epoch 3006, Total Reward: -104.0
Epoch 3007, Total Reward: -108.0
Updating Network weights.
Epoch 3008, Total Reward: -129.0
Epoch 3009, Total Reward: -86.0
Epoch 3010, Total Reward: -97.0
Epoch 3011, Total Reward: -133.0
Epoch 3012, Total Reward: -106.0
Epoch 3013, Total Reward: -104.0
Epoch 3014, Total Reward: -113.0
Epoch 3015, Total Reward: -126.0
Epoch 3016, Total Reward: -89.0
Epoch 3017, Total Reward: -86.0
Updating Network weights.
Epoch 3018, Total Reward: -104.0
Epoch 3019, Total Reward: -125.0
Epoch 3020, Total Reward: -108.0
Epoch 3021, Total Reward: -106.0
Epoch 3022, Total Reward: -83.0
Epoch 3023, Total Reward: -199.0
Epoch 3024, Total Reward: -92.0
Epoch 3025, Total Reward: -105.0
Epoch 3026, Total Reward: -106.0
Updating Network weights.
Epoch 3027, Total Reward: -83.0
Epoch 3028, Total Reward: -182.0
Epoch 3029, Total Reward: -108.0
Epoch 3030, Total Reward: -102.0
Epoch 3031, Total Reward: -89.0
Epoch 3032, Total Reward: -105.0
Epoch 

Updating Network weights.
Epoch 3237, Total Reward: -105.0
Epoch 3238, Total Reward: -84.0
Epoch 3239, Total Reward: -90.0
Epoch 3240, Total Reward: -104.0
Epoch 3241, Total Reward: -108.0
Epoch 3242, Total Reward: -85.0
Epoch 3243, Total Reward: -106.0
Epoch 3244, Total Reward: -103.0
Epoch 3245, Total Reward: -88.0
Epoch 3246, Total Reward: -103.0
Updating Network weights.
Epoch 3247, Total Reward: -104.0
Epoch 3248, Total Reward: -107.0
Epoch 3249, Total Reward: -104.0
Epoch 3250, Total Reward: -104.0
Epoch 3251, Total Reward: -107.0
Epoch 3252, Total Reward: -108.0
Epoch 3253, Total Reward: -106.0
Epoch 3254, Total Reward: -85.0
Epoch 3255, Total Reward: -105.0
Epoch 3256, Total Reward: -108.0
Updating Network weights.
Epoch 3257, Total Reward: -105.0
Epoch 3258, Total Reward: -104.0
Epoch 3259, Total Reward: -107.0
Epoch 3260, Total Reward: -108.0
Epoch 3261, Total Reward: -103.0
Epoch 3262, Total Reward: -103.0
Epoch 3263, Total Reward: -105.0
Epoch 3264, Total Reward: -99.0
Epoc

Epoch 3468, Total Reward: -98.0
Epoch 3469, Total Reward: -106.0
Updating Network weights.
Epoch 3470, Total Reward: -95.0
Epoch 3471, Total Reward: -105.0
Epoch 3472, Total Reward: -103.0
Epoch 3473, Total Reward: -105.0
Epoch 3474, Total Reward: -105.0
Epoch 3475, Total Reward: -85.0
Epoch 3476, Total Reward: -87.0
Epoch 3477, Total Reward: -104.0
Epoch 3478, Total Reward: -167.0
Updating Network weights.
Epoch 3479, Total Reward: -110.0
Epoch 3480, Total Reward: -105.0
Epoch 3481, Total Reward: -106.0
Epoch 3482, Total Reward: -107.0
Epoch 3483, Total Reward: -98.0
Epoch 3484, Total Reward: -109.0
Epoch 3485, Total Reward: -173.0
Epoch 3486, Total Reward: -96.0
Epoch 3487, Total Reward: -105.0
Updating Network weights.
Epoch 3488, Total Reward: -92.0
Epoch 3489, Total Reward: -104.0
Epoch 3490, Total Reward: -106.0
Epoch 3491, Total Reward: -104.0
Epoch 3492, Total Reward: -97.0
Epoch 3493, Total Reward: -90.0
Epoch 3494, Total Reward: -106.0
Epoch 3495, Total Reward: -174.0
Epoch 3

Epoch 3699, Total Reward: -106.0
Updating Network weights.
Epoch 3700, Total Reward: -106.0
Epoch 3701, Total Reward: -84.0
Epoch 3702, Total Reward: -90.0
Epoch 3703, Total Reward: -103.0
Epoch 3704, Total Reward: -112.0
Epoch 3705, Total Reward: -84.0
Epoch 3706, Total Reward: -115.0
Epoch 3707, Total Reward: -91.0
Epoch 3708, Total Reward: -118.0
Epoch 3709, Total Reward: -106.0
Updating Network weights.
Epoch 3710, Total Reward: -109.0
Epoch 3711, Total Reward: -85.0
Epoch 3712, Total Reward: -104.0
Epoch 3713, Total Reward: -89.0
Epoch 3714, Total Reward: -105.0
Epoch 3715, Total Reward: -84.0
Epoch 3716, Total Reward: -109.0
Epoch 3717, Total Reward: -105.0
Epoch 3718, Total Reward: -105.0
Epoch 3719, Total Reward: -105.0
Updating Network weights.
Epoch 3720, Total Reward: -103.0
Epoch 3721, Total Reward: -106.0
Epoch 3722, Total Reward: -92.0
Epoch 3723, Total Reward: -102.0
Epoch 3724, Total Reward: -106.0
Epoch 3725, Total Reward: -114.0
Epoch 3726, Total Reward: -100.0
Epoch 

Epoch 3931, Total Reward: -105.0
Epoch 3932, Total Reward: -88.0
Epoch 3933, Total Reward: -87.0
Updating Network weights.
Epoch 3934, Total Reward: -91.0
Epoch 3935, Total Reward: -105.0
Epoch 3936, Total Reward: -107.0
Epoch 3937, Total Reward: -104.0
Epoch 3938, Total Reward: -105.0
Epoch 3939, Total Reward: -86.0
Epoch 3940, Total Reward: -87.0
Epoch 3941, Total Reward: -106.0
Epoch 3942, Total Reward: -91.0
Epoch 3943, Total Reward: -102.0
Updating Network weights.
Epoch 3944, Total Reward: -91.0
Epoch 3945, Total Reward: -108.0
Epoch 3946, Total Reward: -105.0
Epoch 3947, Total Reward: -103.0
Epoch 3948, Total Reward: -199.0
Epoch 3949, Total Reward: -104.0
Epoch 3950, Total Reward: -105.0
Epoch 3951, Total Reward: -103.0
Updating Network weights.
Epoch 3952, Total Reward: -200.0
Epoch 3953, Total Reward: -108.0
Epoch 3954, Total Reward: -103.0
Epoch 3955, Total Reward: -102.0
Epoch 3956, Total Reward: -106.0
Epoch 3957, Total Reward: -113.0
Epoch 3958, Total Reward: -105.0
Epoch

Epoch 4162, Total Reward: -85.0
Epoch 4163, Total Reward: -91.0
Epoch 4164, Total Reward: -106.0
Epoch 4165, Total Reward: -108.0
Updating Network weights.
Epoch 4166, Total Reward: -96.0
Epoch 4167, Total Reward: -108.0
Epoch 4168, Total Reward: -87.0
Epoch 4169, Total Reward: -113.0
Epoch 4170, Total Reward: -95.0
Epoch 4171, Total Reward: -104.0
Epoch 4172, Total Reward: -105.0
Epoch 4173, Total Reward: -104.0
Epoch 4174, Total Reward: -100.0
Epoch 4175, Total Reward: -107.0
Updating Network weights.
Epoch 4176, Total Reward: -104.0
Epoch 4177, Total Reward: -109.0
Epoch 4178, Total Reward: -94.0
Epoch 4179, Total Reward: -108.0
Epoch 4180, Total Reward: -110.0
Epoch 4181, Total Reward: -104.0
Epoch 4182, Total Reward: -86.0
Epoch 4183, Total Reward: -102.0
Epoch 4184, Total Reward: -105.0
Epoch 4185, Total Reward: -88.0
Updating Network weights.
Epoch 4186, Total Reward: -86.0
Epoch 4187, Total Reward: -88.0
Epoch 4188, Total Reward: -109.0
Epoch 4189, Total Reward: -104.0
Epoch 41

Epoch 4412, Total Reward: -99.0
Epoch 4413, Total Reward: -111.0
Updating Network weights.
Epoch 4414, Total Reward: -161.0
Epoch 4415, Total Reward: -91.0
Epoch 4416, Total Reward: -90.0
Epoch 4417, Total Reward: -107.0
Epoch 4418, Total Reward: -109.0
Epoch 4419, Total Reward: -106.0
Epoch 4420, Total Reward: -105.0
Epoch 4421, Total Reward: -164.0
Epoch 4422, Total Reward: -106.0
Updating Network weights.
Epoch 4423, Total Reward: -104.0
Epoch 4424, Total Reward: -108.0
Epoch 4425, Total Reward: -110.0
Epoch 4426, Total Reward: -106.0
Epoch 4427, Total Reward: -105.0
Epoch 4428, Total Reward: -105.0
Epoch 4429, Total Reward: -91.0
Epoch 4430, Total Reward: -97.0
Epoch 4431, Total Reward: -106.0
Epoch 4432, Total Reward: -87.0
Updating Network weights.
Epoch 4433, Total Reward: -112.0
Epoch 4434, Total Reward: -88.0
Epoch 4435, Total Reward: -109.0
Epoch 4436, Total Reward: -119.0
Epoch 4437, Total Reward: -95.0
Epoch 4438, Total Reward: -107.0
Epoch 4439, Total Reward: -109.0
Epoch 

Epoch 4643, Total Reward: -111.0
Epoch 4644, Total Reward: -90.0
Epoch 4645, Total Reward: -111.0
Epoch 4646, Total Reward: -106.0
Epoch 4647, Total Reward: -89.0
Epoch 4648, Total Reward: -84.0
Epoch 4649, Total Reward: -85.0
Epoch 4650, Total Reward: -100.0
Updating Network weights.
Epoch 4651, Total Reward: -94.0
Epoch 4652, Total Reward: -166.0
Epoch 4653, Total Reward: -108.0
Epoch 4654, Total Reward: -111.0
Epoch 4655, Total Reward: -87.0
Epoch 4656, Total Reward: -108.0
Epoch 4657, Total Reward: -109.0
Epoch 4658, Total Reward: -120.0
Epoch 4659, Total Reward: -91.0
Updating Network weights.
Epoch 4660, Total Reward: -153.0
Epoch 4661, Total Reward: -151.0
Epoch 4662, Total Reward: -89.0
Epoch 4663, Total Reward: -86.0
Epoch 4664, Total Reward: -104.0
Epoch 4665, Total Reward: -115.0
Epoch 4666, Total Reward: -109.0
Epoch 4667, Total Reward: -95.0
Epoch 4668, Total Reward: -85.0
Updating Network weights.
Epoch 4669, Total Reward: -93.0
Epoch 4670, Total Reward: -90.0
Epoch 4671,

Epoch 4875, Total Reward: -120.0
Epoch 4876, Total Reward: -109.0
Epoch 4877, Total Reward: -95.0
Epoch 4878, Total Reward: -104.0
Epoch 4879, Total Reward: -93.0
Epoch 4880, Total Reward: -86.0
Epoch 4881, Total Reward: -102.0
Epoch 4882, Total Reward: -108.0
Epoch 4883, Total Reward: -111.0
Epoch 4884, Total Reward: -102.0
Updating Network weights.
Epoch 4885, Total Reward: -105.0
Epoch 4886, Total Reward: -86.0
Epoch 4887, Total Reward: -104.0
Epoch 4888, Total Reward: -97.0
Epoch 4889, Total Reward: -103.0
Epoch 4890, Total Reward: -107.0
Epoch 4891, Total Reward: -102.0
Epoch 4892, Total Reward: -83.0
Epoch 4893, Total Reward: -98.0
Epoch 4894, Total Reward: -102.0
Updating Network weights.
Epoch 4895, Total Reward: -87.0
Epoch 4896, Total Reward: -88.0
Epoch 4897, Total Reward: -106.0
Epoch 4898, Total Reward: -86.0
Epoch 4899, Total Reward: -103.0
Epoch 4900, Total Reward: -194.0
Epoch 4901, Total Reward: -105.0
Epoch 4902, Total Reward: -91.0
Epoch 4903, Total Reward: -97.0
Upd