# 1. Import Dependencies

In [3]:
import gym 
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os

# 2. Test Environment

In [4]:
environment_name = "Breakout-v0"

In [5]:
env = gym.make(environment_name)

In [6]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:3.0
Episode:2 Score:0.0
Episode:3 Score:3.0
Episode:4 Score:1.0
Episode:5 Score:1.0


In [7]:
env.action_space.sample()

0

In [8]:
env.observation_space.sample()

array([[[110, 137, 214],
        [193,  77, 136],
        [103,  67,   9],
        ...,
        [168, 110, 169],
        [192, 201,  54],
        [247, 103,  68]],

       [[ 66,  37, 102],
        [119,  67,  83],
        [  2, 178,  61],
        ...,
        [ 77,  93, 157],
        [ 67,  68, 156],
        [ 40, 132,  99]],

       [[171, 216,  25],
        [245,  11,  46],
        [111, 121,  85],
        ...,
        [139,  65,  94],
        [102,   1, 110],
        [ 61, 180,  51]],

       ...,

       [[ 44,  66, 183],
        [ 32, 195,  50],
        [221,  96,  74],
        ...,
        [170, 109, 186],
        [186, 250, 197],
        [  6, 239, 218]],

       [[123,  33, 236],
        [112, 244,  92],
        [154, 151, 161],
        ...,
        [152, 130, 110],
        [ 63, 195,  88],
        [  7, 177, 154]],

       [[ 70, 240, 204],
        [ 76, 122, 111],
        [101, 144,  71],
        ...,
        [157,  87, 229],
        [170,  12, 160],
        [ 29,  43, 147]]

# 3. Vectorise Environment and Train Model

In [9]:
env = make_atari_env('Breakout-v0', n_envs=4, seed=0)

In [10]:
env = VecFrameStack(env, n_stack=4)

In [12]:
log_path = os.path.join('Training', 'Logs')

In [13]:
model = A2C("CnnPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env in a VecTransposeImage.


[W NNPACK.cpp:79] Could not initialize NNPACK! Reason: Unsupported hardware.


In [16]:
model.learn(total_timesteps=2000000)

2021-09-18 14:59:03.428500: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-18 14:59:03.428537: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Logging to Training/Logs/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 282      |
|    ep_rew_mean        | 1.47     |
| time/                 |          |
|    fps                | 65       |
|    iterations         | 100      |
|    time_elapsed       | 30       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.35    |
|    explained_variance | 0.0365   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.0497   |
|    value_loss         | 0.0496   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 292      |
|    ep_rew_mean        | 1.7      |
| time/                 |          |
|    fps                | 83       |
|    iterations         | 200      |
|    time_elapsed       | 47       |
|    total_timesteps    | 4000     |
| train

<stable_baselines3.a2c.a2c.A2C at 0x7fe5e4610520>

# 4. Save and Reload Model

In [17]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_model')

In [18]:
model.save(a2c_path)

In [19]:
del model

In [20]:
env = make_atari_env('Breakout-v0', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [21]:
model = A2C.load(a2c_path, env)

Wrapping the env in a VecTransposeImage.


# 5. Evaluate and Test

In [23]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)
env.close()

In [24]:
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()

X connection to :0 broken (explicit kill or server shutdown).


In [None]:
env.close()