# Asynchronous Advantage Actor-Critic (A3C)
In this example, we use an implementation of the A3C algorithm to teach the agent how to play Doom, that classic 1993 game we played all the way through the 2000s on our calculators in math class.

Requires the VizDoom package: `pip install vizdoom`

While training is taking place, statistics on agent performance are available from Tensorboard. To launch it, use:

`tensorboard --logdir=worker_0:'./train_0',worker_1:'./train_1',worker_2:'./train_2',worker_3:'./train_3'`

In [1]:
import threading
import multiprocessing
import numpy as np
import tensorflow as tf
import time

import matplotlib.pyplot as plt
%matplotlib inline

from a3c import ACNetwork, Worker
from helper import *
from scipy.signal import lfilter
from skimage.transform import resize

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.


In [2]:
max_episode_length = 300
gamma = 0.99
lr = 1e-4
img_sz = 84
buffer_sz = 30
s_size = 7056  # Observations are grayscale frames of 84*84*1
a_size = 3     # Agent can move left, right, or fire
load_model = False
model_dir = 'model'

In [None]:
tf.reset_default_graph()

if not os.path.exists(model_dir):
    os.makedirs(model_dir)
    
if not os.path.exists('frames'):
    os.makedirs('frames')
    
with tf.device('/cpu:0'):
    global_episodes = tf.Variable(0, dtype = tf.int32, name = 'global_episodes', trainable = False)
    trainer = tf.train.AdamOptimizer(learning_rate = lr)
    master_network = ACNetwork(s_size, a_size, img_sz, 'global', None)
    num_workers = multiprocessing.cpu_count()
    workers = []
    # Create the Workers
    for i in range(num_workers):
        workers.append(
            Worker(
                DoomGame(), i, s_size, a_size, buffer_sz,
                trainer, model_dir, global_episodes
            )
        )
    saver = tf.train.Saver(max_to_keep = 5)
    
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    if load_model:
        print('Loading model...')
        ckpt = tf.train.get_checkpoint_state(model_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    else:
        sess.run(tf.global_variables_initializer)
        
    # This is where the asynchronous magic happens. Start the "work"
    # process for each Worker in a separate thread
    worker_threads = []
    for worker in workers:
        worker_work = lambda: worker.work(max_episode_length, gamma, sess, coord, saver)
        t = threading.Thread(target = (worker_work))
        t.start()
        sleep(0.5)
        worker_threads.append(t)
    coord.join(worker_threads)