# Experiment 1 Setup

- Environemt is fixed with seed 4
- Map is undiscovered

General env_setup:
- Num rooms: 3
- Items: Souls (2), Health Potion (0-1), Enemies (Level 1, 1-2)

Aims:
The aim of this experiment is to check if the results from experiment 1 holds if we mainpulate the seed value.

In [2]:
!pip install stable-baselines3[extra] --quiet

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3 -m pip install --upgrade pip' command.[0m


# Initialization Environment
Initialization and setup of the custom environment

In [2]:
from env import RoguesSoulsEnv

env = RoguesSoulsEnv(max_steps=500)
env.set_seed(4)
env.set_mode(1)
# Needs resetting to apply above changes
state = env.reset()
env.render(mode='human')

------------------------


                        
                        
                        
                        
     |------|           
     |....*.|           
     |......+####       
     |.>...b|   #       
     |..$...| |-+--|    
     |------| |.*..|    
              |..$b|    
              |....|    
              |....|    
    |----|    |....|    
    |....|    |-+--|    
    |....|      #       
    |.@..+#######       
    |....|              
    |....|              
    |----|              
                        
                        
                        
                        
		LEVEL: 1/1	  HP: 16 	SOULS: 0
		DEFENSE: 2	MAX HP: 16 	POWER: 4
------------------------


# Flattening the observation space

Stable-baseline3 requires the obseration space to be a 1D vector for better policy optimisation.


In [3]:
from gym.wrappers import FlattenObservation
env = FlattenObservation(env)

# Initialization agent
Agent will be based on stable-baselines3 DQN algorithm

In [4]:
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv

# Stop training when the model reaches the reward threshold
callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=140, verbose=1)
eval_callback = EvalCallback(env, callback_on_new_best=callback_on_best, verbose=1)

env = DummyVecEnv([lambda: env])
model = DQN('MlpPolicy', env, verbose=1, exploration_fraction=0.3, tensorboard_log='./logs/ex2_0_v1')

Using cpu device


# Adding the necessary check function
This function checks the average score from n episodes

In [5]:
def get_average(env, model, n:int = 100) -> int:
    total = 0
    for e in range(1, n+1):
        obs = env.reset()
        done = False
        score = 0

        while not done:
            action, _ = model.predict(obs)
            obs, reward, done, _ = env.step(action)
            score += reward
        total += score

    return total/n

# Training the agent

The agent will be trained until its average performance is above 180.

In [6]:
# for i in range(5):
#     tot_steps = 25000
#     model.learn(total_timesteps = tot_steps, reset_num_timesteps=False)
#     avg = get_average(model, 25)
#     print(f'Total iterations: {(i+1)*tot_steps}; Average {avg}')
#     if avg > 100:
#         break
    
model.learn(total_timesteps = 800000, reset_num_timesteps=False, tb_log_name='first_run', callback=eval_callback)
avg = get_average(env, model, 25)
print(f'Average: {avg}')

Logging to ./logs/ex2_0_v1/first_run_0
----------------------------------
| rollout/            |          |
|    exploration rate | 0.992    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 189      |
|    time_elapsed     | 10       |
|    total timesteps  | 2000     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.984    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 241      |
|    time_elapsed     | 16       |
|    total timesteps  | 4000     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.976    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 261      |
|    time_elapsed     | 22       |
|    total timesteps  | 6000     |
----------------------------------
----------------



Eval num_timesteps=10000, episode_reward=-150.00 +/- 0.00
Episode length: 500.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 500      |
|    mean_reward      | -150     |
| rollout/            |          |
|    exploration rate | 0.96     |
| time/               |          |
|    total timesteps  | 10000    |
----------------------------------
New best mean reward!
----------------------------------
| rollout/            |          |
|    exploration rate | 0.96     |
| time/               |          |
|    episodes         | 20       |
|    fps              | 227      |
|    time_elapsed     | 43       |
|    total timesteps  | 10000    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.953    |
| time/               |          |
|    episodes         | 24       |
|    fps              | 238      |
|    time_elapsed     | 49       |
|    total ti

----------------------------------
| rollout/            |          |
|    exploration rate | 0.802    |
| time/               |          |
|    episodes         | 100      |
|    fps              | 247      |
|    time_elapsed     | 202      |
|    total timesteps  | 50000    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.794    |
| time/               |          |
|    episodes         | 104      |
|    fps              | 248      |
|    time_elapsed     | 209      |
|    total timesteps  | 52000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00129  |
|    n_updates        | 499      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.788    |
| time/               |          |
|    episodes         | 108      |
|    fps              | 249      |
|    time_elapsed   

----------------------------------
| rollout/            |          |
|    exploration rate | 0.714    |
| time/               |          |
|    episodes         | 164      |
|    fps              | 244      |
|    time_elapsed     | 295      |
|    total timesteps  | 72200    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0257   |
|    n_updates        | 5549     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.707    |
| time/               |          |
|    episodes         | 168      |
|    fps              | 245      |
|    time_elapsed     | 301      |
|    total timesteps  | 73977    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00488  |
|    n_updates        | 5994     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.606    |
| time/               |          |
|    episodes         | 228      |
|    fps              | 246      |
|    time_elapsed     | 404      |
|    total timesteps  | 99547    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00853  |
|    n_updates        | 12386    |
----------------------------------
Eval num_timesteps=100000, episode_reward=-141.00 +/- 0.00
Episode length: 500.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 500      |
|    mean_reward      | -141     |
| rollout/            |          |
|    exploration rate | 0.604    |
| time/               |          |
|    total timesteps  | 100000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0106   |
|    n_updates        | 12499    |
----------------------------------

----------------------------------
| rollout/            |          |
|    exploration rate | 0.496    |
| time/               |          |
|    episodes         | 288      |
|    fps              | 245      |
|    time_elapsed     | 517      |
|    total timesteps  | 127403   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0281   |
|    n_updates        | 19350    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.488    |
| time/               |          |
|    episodes         | 292      |
|    fps              | 246      |
|    time_elapsed     | 524      |
|    total timesteps  | 129403   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0103   |
|    n_updates        | 19850    |
----------------------------------
Eval num_timesteps=130000, episode_reward=-86.00 +/- 0.00
Episode length: 500.00 +/- 0.00


----------------------------------
| rollout/            |          |
|    exploration rate | 0.379    |
| time/               |          |
|    episodes         | 348      |
|    fps              | 247      |
|    time_elapsed     | 633      |
|    total timesteps  | 156773   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.203    |
|    n_updates        | 26693    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.373    |
| time/               |          |
|    episodes         | 352      |
|    fps              | 247      |
|    time_elapsed     | 639      |
|    total timesteps  | 158458   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0129   |
|    n_updates        | 27114    |
----------------------------------
Eval num_timesteps=160000, episode_reward=-144.00 +/- 0.00
Episode length: 500.00 +/- 0.00

----------------------------------
| rollout/            |          |
|    exploration rate | 0.272    |
| time/               |          |
|    episodes         | 408      |
|    fps              | 247      |
|    time_elapsed     | 743      |
|    total timesteps  | 183894   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.828    |
|    n_updates        | 33473    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.266    |
| time/               |          |
|    episodes         | 412      |
|    fps              | 247      |
|    time_elapsed     | 748      |
|    total timesteps  | 185515   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.526    |
|    n_updates        | 33878    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.159    |
| time/               |          |
|    episodes         | 468      |
|    fps              | 247      |
|    time_elapsed     | 859      |
|    total timesteps  | 212500   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.159    |
|    n_updates        | 40624    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.151    |
| time/               |          |
|    episodes         | 472      |
|    fps              | 247      |
|    time_elapsed     | 866      |
|    total timesteps  | 214500   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.54     |
|    n_updates        | 41124    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 528      |
|    fps              | 246      |
|    time_elapsed     | 983      |
|    total timesteps  | 242000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.104    |
|    n_updates        | 47999    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 532      |
|    fps              | 246      |
|    time_elapsed     | 990      |
|    total timesteps  | 244000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0311   |
|    n_updates        | 48499    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 588      |
|    fps              | 244      |
|    time_elapsed     | 1112     |
|    total timesteps  | 272000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.439    |
|    n_updates        | 55499    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 592      |
|    fps              | 244      |
|    time_elapsed     | 1120     |
|    total timesteps  | 274000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.079    |
|    n_updates        | 55999    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 660      |
|    fps              | 244      |
|    time_elapsed     | 1177     |
|    total timesteps  | 287925   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.189    |
|    n_updates        | 59481    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 664      |
|    fps              | 244      |
|    time_elapsed     | 1178     |
|    total timesteps  | 288051   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.155    |
|    n_updates        | 59512    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 728      |
|    fps              | 244      |
|    time_elapsed     | 1188     |
|    total timesteps  | 290634   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.198    |
|    n_updates        | 60158    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 732      |
|    fps              | 244      |
|    time_elapsed     | 1189     |
|    total timesteps  | 290793   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.14     |
|    n_updates        | 60198    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 804      |
|    fps              | 244      |
|    time_elapsed     | 1206     |
|    total timesteps  | 295273   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.123    |
|    n_updates        | 61318    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 808      |
|    fps              | 244      |
|    time_elapsed     | 1207     |
|    total timesteps  | 295401   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0558   |
|    n_updates        | 61350    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 880      |
|    fps              | 244      |
|    time_elapsed     | 1224     |
|    total timesteps  | 299857   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.169    |
|    n_updates        | 62464    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 884      |
|    fps              | 244      |
|    time_elapsed     | 1224     |
|    total timesteps  | 299988   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0483   |
|    n_updates        | 62496    |
----------------------------------
Eval num_timesteps=300000, episode_reward=-94.00 +/- 0.00
Episode length: 31.00 +/- 0.00
-

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 948      |
|    fps              | 244      |
|    time_elapsed     | 1243     |
|    total timesteps  | 304554   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0298   |
|    n_updates        | 63638    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 952      |
|    fps              | 244      |
|    time_elapsed     | 1244     |
|    total timesteps  | 304708   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0719   |
|    n_updates        | 63676    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1024     |
|    fps              | 245      |
|    time_elapsed     | 1263     |
|    total timesteps  | 309839   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.108    |
|    n_updates        | 64959    |
----------------------------------
Eval num_timesteps=310000, episode_reward=-143.00 +/- 0.00
Episode length: 500.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 500      |
|    mean_reward      | -143     |
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    total timesteps  | 310000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.14     |
|    n_updates        | 64999    |
----------------------------------

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1092     |
|    fps              | 243      |
|    time_elapsed     | 1295     |
|    total timesteps  | 316011   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.206    |
|    n_updates        | 66502    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1096     |
|    fps              | 243      |
|    time_elapsed     | 1296     |
|    total timesteps  | 316299   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0867   |
|    n_updates        | 66574    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1160     |
|    fps              | 242      |
|    time_elapsed     | 1324     |
|    total timesteps  | 321445   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.16     |
|    n_updates        | 67861    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1164     |
|    fps              | 242      |
|    time_elapsed     | 1325     |
|    total timesteps  | 321726   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.17     |
|    n_updates        | 67931    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1228     |
|    fps              | 242      |
|    time_elapsed     | 1394     |
|    total timesteps  | 337963   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.608    |
|    n_updates        | 71990    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1232     |
|    fps              | 242      |
|    time_elapsed     | 1402     |
|    total timesteps  | 339963   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.94     |
|    n_updates        | 72490    |
----------------------------------
Eval num_timesteps=340000, episode_reward=-145.00 +/- 0.00
Episode length: 500.00 +/- 0.00

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1288     |
|    fps              | 240      |
|    time_elapsed     | 1522     |
|    total timesteps  | 365589   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.02     |
|    n_updates        | 78897    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1292     |
|    fps              | 240      |
|    time_elapsed     | 1528     |
|    total timesteps  | 367148   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0694   |
|    n_updates        | 79286    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1348     |
|    fps              | 238      |
|    time_elapsed     | 1644     |
|    total timesteps  | 391685   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.03     |
|    n_updates        | 85421    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1352     |
|    fps              | 238      |
|    time_elapsed     | 1651     |
|    total timesteps  | 393648   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0856   |
|    n_updates        | 85911    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1412     |
|    fps              | 236      |
|    time_elapsed     | 1790     |
|    total timesteps  | 424000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0728   |
|    n_updates        | 93499    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1416     |
|    fps              | 236      |
|    time_elapsed     | 1797     |
|    total timesteps  | 426000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.404    |
|    n_updates        | 93999    |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1476     |
|    fps              | 235      |
|    time_elapsed     | 1934     |
|    total timesteps  | 456000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.14     |
|    n_updates        | 101499   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1480     |
|    fps              | 235      |
|    time_elapsed     | 1941     |
|    total timesteps  | 458000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.02     |
|    n_updates        | 101999   |
----------------------------------
Eval num_timesteps=460000, episode_reward=-150.00 +/- 0.00
Episode length: 500.00 +/- 0.00

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1540     |
|    fps              | 234      |
|    time_elapsed     | 2079     |
|    total timesteps  | 488000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.416    |
|    n_updates        | 109499   |
----------------------------------
Eval num_timesteps=490000, episode_reward=-149.00 +/- 0.00
Episode length: 500.00 +/- 0.00
----------------------------------
| eval/               |          |
|    mean_ep_length   | 500      |
|    mean_reward      | -149     |
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    total timesteps  | 490000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.154    |
|    n_updates        | 109999   |
----------------------------------

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1600     |
|    fps              | 113      |
|    time_elapsed     | 4568     |
|    total timesteps  | 516544   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.149    |
|    n_updates        | 116635   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1604     |
|    fps              | 113      |
|    time_elapsed     | 4571     |
|    total timesteps  | 517648   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0247   |
|    n_updates        | 116911   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1660     |
|    fps              | 101      |
|    time_elapsed     | 5380     |
|    total timesteps  | 546000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0255   |
|    n_updates        | 123999   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1664     |
|    fps              | 101      |
|    time_elapsed     | 5386     |
|    total timesteps  | 548000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.127    |
|    n_updates        | 124499   |
----------------------------------
Eval num_timesteps=550000, episode_reward=-150.00 +/- 0.00
Episode length: 500.00 +/- 0.00

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1724     |
|    fps              | 104      |
|    time_elapsed     | 5507     |
|    total timesteps  | 577082   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.61     |
|    n_updates        | 131770   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1728     |
|    fps              | 104      |
|    time_elapsed     | 5512     |
|    total timesteps  | 578651   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.147    |
|    n_updates        | 132162   |
----------------------------------
Eval num_timesteps=580000, episode_reward=-35.00 +/- 0.00
Episode length: 500.00 +/- 0.00


----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1784     |
|    fps              | 107      |
|    time_elapsed     | 5621     |
|    total timesteps  | 606539   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0833   |
|    n_updates        | 139134   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1788     |
|    fps              | 108      |
|    time_elapsed     | 5628     |
|    total timesteps  | 608106   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.111    |
|    n_updates        | 139526   |
----------------------------------
Eval num_timesteps=610000, episode_reward=-35.00 +/- 0.00
Episode length: 500.00 +/- 0.00


----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1844     |
|    fps              | 110      |
|    time_elapsed     | 5736     |
|    total timesteps  | 636500   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0979   |
|    n_updates        | 146624   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1848     |
|    fps              | 111      |
|    time_elapsed     | 5741     |
|    total timesteps  | 638500   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.117    |
|    n_updates        | 147124   |
----------------------------------
Eval num_timesteps=640000, episode_reward=-85.00 +/- 0.00
Episode length: 500.00 +/- 0.00


----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1904     |
|    fps              | 114      |
|    time_elapsed     | 5840     |
|    total timesteps  | 666041   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0436   |
|    n_updates        | 154010   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1908     |
|    fps              | 114      |
|    time_elapsed     | 5845     |
|    total timesteps  | 667750   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.319    |
|    n_updates        | 154437   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1964     |
|    fps              | 116      |
|    time_elapsed     | 5941     |
|    total timesteps  | 695000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0904   |
|    n_updates        | 161249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 1968     |
|    fps              | 117      |
|    time_elapsed     | 5948     |
|    total timesteps  | 697000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.117    |
|    n_updates        | 161749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2024     |
|    fps              | 119      |
|    time_elapsed     | 6048     |
|    total timesteps  | 725000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.114    |
|    n_updates        | 168749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2028     |
|    fps              | 120      |
|    time_elapsed     | 6054     |
|    total timesteps  | 727000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0841   |
|    n_updates        | 169249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2084     |
|    fps              | 122      |
|    time_elapsed     | 6156     |
|    total timesteps  | 755000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.103    |
|    n_updates        | 176249   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2088     |
|    fps              | 122      |
|    time_elapsed     | 6162     |
|    total timesteps  | 757000   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0871   |
|    n_updates        | 176749   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2144     |
|    fps              | 125      |
|    time_elapsed     | 6261     |
|    total timesteps  | 783580   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.113    |
|    n_updates        | 183394   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0.05     |
| time/               |          |
|    episodes         | 2148     |
|    fps              | 125      |
|    time_elapsed     | 6266     |
|    total timesteps  | 785127   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.93     |
|    n_updates        | 183781   |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rat

# Results

In [7]:
# import time
episodes = 20
print(f'TESTING MODEL ON {episodes} EPISODES\n')
for e in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0

    while not done:
#         env.render()
#         input('[ENTER]')
#         time.sleep(1)
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        score += reward
    completed = info[0].get('Completed')
    print(f'Episode: {e}, Score: {score}, Completed: {completed}')
    

TESTING MODEL ON 20 EPISODES

Episode: 1, Score: [-34.99926], Completed: False
Episode: 2, Score: [-34.999268], Completed: False
Episode: 3, Score: [-34.99926], Completed: False
Episode: 4, Score: [-34.999268], Completed: False
Episode: 5, Score: [-34.99926], Completed: False
Episode: 6, Score: [-34.99926], Completed: False
Episode: 7, Score: [-84.99959], Completed: False
Episode: 8, Score: [-34.99926], Completed: False
Episode: 9, Score: [169.50067], Completed: True
Episode: 10, Score: [-34.99926], Completed: False
Episode: 11, Score: [-34.99926], Completed: False
Episode: 12, Score: [-34.99926], Completed: False
Episode: 13, Score: [-34.99926], Completed: False
Episode: 14, Score: [-34.99926], Completed: False
Episode: 15, Score: [-34.999268], Completed: False
Episode: 16, Score: [-34.99926], Completed: False
Episode: 17, Score: [-34.99926], Completed: False
Episode: 18, Score: [-34.99926], Completed: False
Episode: 19, Score: [-34.99926], Completed: False
Episode: 20, Score: [-34.99

In [8]:
## Continue training
# del model
# model = DQN.load('./models/experiment_1/ex7_1')
# model.learn(total_timesteps = 100000, reset_num_timesteps=False, tb_log_name='second_run')

In [9]:
# # Save model
# model.save('./models/experiment_1/ex12_v2')