# Asynchronous Advantage Actor Critic (A3C)
---
In this notebook, we train A3C

### 1. Import the Necessary Packages

In [1]:
import os

import torch
import torch.multiprocessing as mp

import my_optim
from envs import create_atari_env
from model import ActorCritic
from test import test
from train import train

class args:
    lr = 0.0001
    gamma = 0.99
    gae_lambda = 1.00
    entropy_coef = 0.01
    value_loss_coef = 0.5
    max_grad_norm = 50
    seed = 1
    num_processes = 4
    num_steps = 255
    max_episode_length = 10000
    env_name = None
    no_shared = False
    

### 2. Instantiate the Environment and Agent

In [None]:
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['CUDA_VISIBLE_DEVICES'] = ""
torch.manual_seed(args.seed)
env = create_atari_env(None)
shared_model = ActorCritic(env.observation_space.shape[0], env.action_space)
shared_model.share_memory()

if args.no_shared:
    optimizer = None
else:
    optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr)
    optimizer.share_memory()

processes = []

counter = mp.Value('i', 0)
lock = mp.Lock()

p = mp.Process(target=test, args=(args.num_processes, args, shared_model, counter))
p.start()
processes.append(p)

for rank in range(0, args.num_processes):
    p = mp.Process(target=train, args=(rank, args, shared_model, counter, lock, optimizer))
    p.start()
    processes.append(p)
for p in processes:
    p.join()

Time 00h 00m 00s, num steps 224, FPS 630, episode reward -2359.628501009234, episode length 100
Time 00h 01m 00s, num steps 25850, FPS 424, episode reward -2359.628501009234, episode length 100
Time 00h 02m 01s, num steps 51037, FPS 420, episode reward -2359.628501009234, episode length 100
Time 00h 03m 01s, num steps 76405, FPS 420, episode reward -2359.628501009234, episode length 100
Time 00h 04m 02s, num steps 101777, FPS 420, episode reward -2382.0316954362584, episode length 101
Time 00h 05m 02s, num steps 127235, FPS 420, episode reward -2359.628501009234, episode length 100
Time 00h 06m 03s, num steps 152856, FPS 421, episode reward -2496.3258534164647, episode length 106
Time 00h 07m 03s, num steps 178122, FPS 420, episode reward -2359.628501009234, episode length 100
Time 00h 08m 04s, num steps 203843, FPS 421, episode reward -2359.628501009234, episode length 100
Time 00h 09m 05s, num steps 229269, FPS 421, episode reward -2359.628501009234, episode length 100
Time 00h 10m 0

  action = 0.01 * action/np.linalg.norm(action, ord = 2)


Time 00h 43m 23s, num steps 1075207, FPS 413, episode reward nan, episode length 125
Time 00h 44m 24s, num steps 1099516, FPS 413, episode reward -2686.5007690862344, episode length 114
Time 00h 45m 24s, num steps 1123683, FPS 412, episode reward nan, episode length 138
Time 00h 46m 25s, num steps 1148387, FPS 412, episode reward nan, episode length 143
Time 00h 47m 26s, num steps 1172846, FPS 412, episode reward -3091.4665125086394, episode length 129
Time 00h 48m 26s, num steps 1197230, FPS 412, episode reward -3091.4665125086394, episode length 129
Time 00h 49m 27s, num steps 1221576, FPS 412, episode reward -2952.1027149632137, episode length 124
Time 00h 50m 28s, num steps 1246289, FPS 412, episode reward -3476.8914957715156, episode length 144
Time 00h 51m 28s, num steps 1271088, FPS 411, episode reward -2711.4744990542754, episode length 115
Time 00h 52m 29s, num steps 1295368, FPS 411, episode reward nan, episode length 100
Time 00h 53m 30s, num steps 1319838, FPS 411, episode 

Time 02h 09m 14s, num steps 3156653, FPS 407, episode reward -2359.628501009234, episode length 100
Time 02h 10m 15s, num steps 3181142, FPS 407, episode reward nan, episode length 103
Time 02h 11m 16s, num steps 3205676, FPS 407, episode reward -3277.171001143663, episode length 136
Time 02h 12m 16s, num steps 3230384, FPS 407, episode reward -2841.1340885832574, episode length 120
Time 02h 13m 17s, num steps 3254987, FPS 407, episode reward -3815.950733001962, episode length 159
Time 02h 14m 18s, num steps 3279442, FPS 407, episode reward -2895.9573846533494, episode length 122
Time 02h 15m 19s, num steps 3304002, FPS 407, episode reward -2359.628501009234, episode length 100
Time 02h 16m 19s, num steps 3328675, FPS 407, episode reward -2359.628501009234, episode length 100
Time 02h 17m 20s, num steps 3352890, FPS 407, episode reward -2359.628501009234, episode length 100
Time 02h 18m 20s, num steps 3377771, FPS 407, episode reward -2841.1340885832574, episode length 120
Time 02h 19m

Time 03h 34m 06s, num steps 5200218, FPS 405, episode reward nan, episode length 100
Time 03h 35m 06s, num steps 5225776, FPS 405, episode reward -2359.628501009234, episode length 100
Time 03h 36m 08s, num steps 5251908, FPS 405, episode reward -5426.763224821954, episode length 346
Time 03h 37m 08s, num steps 5277350, FPS 405, episode reward nan, episode length 100
Time 03h 38m 09s, num steps 5302850, FPS 405, episode reward -2496.3258534164647, episode length 106
Time 03h 39m 09s, num steps 5328409, FPS 405, episode reward nan, episode length 104
Time 03h 40m 10s, num steps 5353982, FPS 405, episode reward -2868.348401136659, episode length 121
Time 03h 41m 10s, num steps 5379583, FPS 405, episode reward -2762.40393871813, episode length 117
Time 03h 42m 11s, num steps 5404794, FPS 405, episode reward -2736.7841642573508, episode length 116
Time 03h 43m 12s, num steps 5430318, FPS 405, episode reward nan, episode length 100
Time 03h 44m 12s, num steps 5455941, FPS 406, episode rewar

Time 05h 01m 59s, num steps 7440730, FPS 411, episode reward -2359.628501009234, episode length 100
Time 05h 02m 59s, num steps 7466456, FPS 411, episode reward -2359.628501009234, episode length 100
Time 05h 04m 00s, num steps 7492435, FPS 411, episode reward -4005.997723275353, episode length 167
Time 05h 05m 01s, num steps 7518469, FPS 411, episode reward nan, episode length 100
Time 05h 06m 01s, num steps 7544057, FPS 411, episode reward nan, episode length 102
Time 05h 07m 02s, num steps 7569906, FPS 411, episode reward nan, episode length 103
Time 05h 08m 02s, num steps 7595928, FPS 411, episode reward -2473.2160182689036, episode length 105
Time 05h 09m 03s, num steps 7621593, FPS 411, episode reward nan, episode length 100
Time 05h 10m 03s, num steps 7646953, FPS 411, episode reward nan, episode length 100
Time 05h 11m 04s, num steps 7672882, FPS 411, episode reward nan, episode length 102
Time 05h 12m 04s, num steps 7699031, FPS 411, episode reward nan, episode length 100
Time

Time 06h 27m 50s, num steps 9639883, FPS 414, episode reward -2359.628501009234, episode length 100
Time 06h 28m 51s, num steps 9665131, FPS 414, episode reward -3658.7180934576277, episode length 152
Time 06h 29m 52s, num steps 9691405, FPS 414, episode reward nan, episode length 114
Time 06h 30m 53s, num steps 9717515, FPS 414, episode reward -3091.4665125086394, episode length 129
Time 06h 31m 53s, num steps 9743625, FPS 414, episode reward nan, episode length 106
Time 06h 32m 54s, num steps 9769593, FPS 414, episode reward nan, episode length 119
Time 06h 33m 54s, num steps 9795525, FPS 414, episode reward -2496.3258534164647, episode length 106
Time 06h 34m 55s, num steps 9821342, FPS 414, episode reward -2382.0316954362584, episode length 101
Time 06h 35m 56s, num steps 9847418, FPS 415, episode reward -4261.671250156531, episode length 177
Time 06h 36m 56s, num steps 9873078, FPS 415, episode reward -2359.628501009234, episode length 100
Time 06h 37m 57s, num steps 9899245, FPS 

Time 07h 52m 50s, num steps 11818518, FPS 417, episode reward -2359.628501009234, episode length 100
Time 07h 53m 51s, num steps 11844448, FPS 417, episode reward nan, episode length 131
Time 07h 54m 51s, num steps 11870105, FPS 417, episode reward -2359.628501009234, episode length 100
Time 07h 55m 52s, num steps 11896283, FPS 417, episode reward -2711.4744990542754, episode length 115
Time 07h 56m 52s, num steps 11922157, FPS 417, episode reward -2359.628501009234, episode length 100
Time 07h 57m 53s, num steps 11948480, FPS 417, episode reward -4604.927430982805, episode length 192
Time 07h 58m 54s, num steps 11974643, FPS 417, episode reward -4627.5380130946305, episode length 193
Time 07h 59m 55s, num steps 12000981, FPS 417, episode reward -2404.58579547247, episode length 102
Time 08h 00m 55s, num steps 12027161, FPS 417, episode reward -2736.7841642573508, episode length 116
Time 08h 01m 56s, num steps 12052909, FPS 417, episode reward -2762.40393871813, episode length 117
Time

Time 09h 18m 47s, num steps 14017397, FPS 418, episode reward -2895.9573846533494, episode length 122
Time 09h 19m 48s, num steps 14043074, FPS 418, episode reward -2359.628501009234, episode length 100
Time 09h 20m 48s, num steps 14068967, FPS 418, episode reward -2952.1027149632137, episode length 124
Time 09h 21m 49s, num steps 14094670, FPS 418, episode reward -2404.58579547247, episode length 102
Time 09h 22m 51s, num steps 14121217, FPS 418, episode reward -5433.230187636285, episode length 385
Time 09h 23m 51s, num steps 14147353, FPS 418, episode reward -3592.7193457545322, episode length 149
Time 09h 24m 52s, num steps 14172486, FPS 418, episode reward -2382.0316954362584, episode length 101
Time 09h 25m 52s, num steps 14198403, FPS 418, episode reward -2359.628501009234, episode length 100
Time 09h 26m 53s, num steps 14224471, FPS 418, episode reward -3570.3574395842074, episode length 148
Time 09h 27m 53s, num steps 14250289, FPS 418, episode reward nan, episode length 116
T

Time 10h 43m 39s, num steps 16191061, FPS 419, episode reward nan, episode length 100
Time 10h 44m 40s, num steps 16217033, FPS 419, episode reward -3277.171001143663, episode length 136
Time 10h 45m 40s, num steps 16242829, FPS 419, episode reward nan, episode length 100
Time 10h 46m 41s, num steps 16268333, FPS 419, episode reward nan, episode length 100
Time 10h 47m 41s, num steps 16294283, FPS 419, episode reward nan, episode length 100
Time 10h 48m 42s, num steps 16320133, FPS 419, episode reward nan, episode length 100
Time 10h 49m 42s, num steps 16346298, FPS 419, episode reward nan, episode length 101
Time 10h 50m 43s, num steps 16372110, FPS 419, episode reward -3036.374576158774, episode length 127
Time 10h 51m 44s, num steps 16398093, FPS 419, episode reward -3064.0276716425524, episode length 128
Time 10h 52m 44s, num steps 16424184, FPS 419, episode reward nan, episode length 100
Time 10h 53m 44s, num steps 16450335, FPS 419, episode reward -2359.628501009234, episode leng

Time 12h 09m 36s, num steps 18393219, FPS 420, episode reward -2895.9573846533494, episode length 122
Time 12h 10m 37s, num steps 18419215, FPS 420, episode reward -4400.235622540142, episode length 183
Time 12h 11m 37s, num steps 18445078, FPS 420, episode reward -2895.9573846533494, episode length 122
Time 12h 12m 40s, num steps 18471248, FPS 420, episode reward -5450.9959728535705, episode length 492
Time 12h 13m 42s, num steps 18497611, FPS 420, episode reward -5439.539197188287, episode length 423
Time 12h 14m 42s, num steps 18523879, FPS 420, episode reward -2736.7841642573508, episode length 116
Time 12h 15m 43s, num steps 18549753, FPS 420, episode reward -2359.628501009234, episode length 100
Time 12h 16m 43s, num steps 18575875, FPS 420, episode reward -2359.628501009234, episode length 100
Time 12h 17m 44s, num steps 18601873, FPS 420, episode reward -2359.628501009234, episode length 100
Time 12h 18m 44s, num steps 18628214, FPS 420, episode reward -2566.3532063447396, epis

Time 13h 34m 38s, num steps 20617305, FPS 422, episode reward -2566.3532063447396, episode length 109
Time 13h 35m 38s, num steps 20644494, FPS 422, episode reward -2359.628501009234, episode length 100
Time 13h 36m 39s, num steps 20671197, FPS 422, episode reward -2359.628501009234, episode length 100
Time 13h 37m 39s, num steps 20697586, FPS 422, episode reward nan, episode length 103
Time 13h 38m 40s, num steps 20724658, FPS 422, episode reward nan, episode length 102
Time 13h 39m 40s, num steps 20751172, FPS 422, episode reward -2359.628501009234, episode length 100
Time 13h 40m 41s, num steps 20777247, FPS 422, episode reward nan, episode length 104
Time 13h 41m 41s, num steps 20803202, FPS 422, episode reward nan, episode length 106
Time 13h 42m 42s, num steps 20829495, FPS 422, episode reward -4334.75905684863, episode length 180
Time 13h 43m 43s, num steps 20855615, FPS 422, episode reward -3118.7238517477576, episode length 130
Time 13h 44m 43s, num steps 20881483, FPS 422, ep

Time 15h 00m 44s, num steps 22820533, FPS 422, episode reward -5440.3694145497375, episode length 428
Time 15h 01m 45s, num steps 22846608, FPS 422, episode reward nan, episode length 122
Time 15h 02m 45s, num steps 22872939, FPS 422, episode reward -2542.8876875127985, episode length 108
Time 15h 03m 46s, num steps 22898702, FPS 422, episode reward -2519.5420729605826, episode length 107
Time 15h 04m 47s, num steps 22924460, FPS 422, episode reward -3302.5937644912797, episode length 137
Time 15h 05m 48s, num steps 22950822, FPS 422, episode reward -5426.597539168268, episode length 345
Time 15h 06m 49s, num steps 22976311, FPS 422, episode reward nan, episode length 126
Time 15h 07m 50s, num steps 23002163, FPS 422, episode reward -3932.181443029756, episode length 164
Time 15h 08m 51s, num steps 23028233, FPS 422, episode reward -2473.2160182689036, episode length 105
Time 15h 09m 51s, num steps 23053732, FPS 422, episode reward -2613.7270159203285, episode length 111
Time 15h 10m 5

### 4. Watch a Smart Agent!

In [None]:
agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))

state = env.reset()
for t in range(200):
    action = agent.act(state, add_noise=False)
    env.render()
    state, reward, done, _ = env.step(action)
    print(reward, done)
    if done:
        break 
img = env.render()
img.show()
#env.close()

### 6. Explore

In this exercise, we have provided a sample DDPG agent and demonstrated how to use it to solve an OpenAI Gym environment.  To continue your learning, you are encouraged to complete any (or all!) of the following tasks:
- Amend the various hyperparameters and network architecture to see if you can get your agent to solve the environment faster than this benchmark implementation.  Once you build intuition for the hyperparameters that work well with this environment, try solving a different OpenAI Gym task!
- Write your own DDPG implementation.  Use this code as reference only when needed -- try as much as you can to write your own algorithm from scratch.
- You may also like to implement prioritized experience replay, to see if it speeds learning.  
- The current implementation adds Ornsetein-Uhlenbeck noise to the action space.  However, it has [been shown](https://blog.openai.com/better-exploration-with-parameter-noise/) that adding noise to the parameters of the neural network policy can improve performance.  Make this change to the code, to verify it for yourself!
- Write a blog post explaining the intuition behind the DDPG algorithm and demonstrating how to use it to solve an RL environment of your choosing.  