## Atari Breakout Q-Learning

##### Allow memory growth for tensorflow, Windows work around

In [1]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

##### Verify GPU availability

In [2]:
#from tensorflow.python.client import device_lib
#print(device_lib.list_local_devices())
#from keras import backend as K
#print(K.tensorflow_backend._get_available_gpus())

#### Constants

In [3]:
IMG_HEIGHT=100
IMG_WIDTH=100
N_FRAMES=4

n_episodes = 10000

In [4]:
from ai_image_preprocess import ImagePreprocessor

img_pp = ImagePreprocessor(IMG_WIDTH, IMG_HEIGHT)
def process(frame):
    return img_pp.preprocess(frame)

#### Training

In [None]:
import ai_display as display
import ai_util as util
import ai_logger as logger

def train(env, agent, n_episodes, model_name="model", save_interval=100):
    for episode in range(n_episodes):
        frame = env.reset()
        state = util.create_state_arr(process(frame))
        score = 0
        
        for t in range(1000):
            #display.show_state(state[:, :, 3], env.spec.id, t, score)

            action = agent.act(state)

            next_frame, reward, done, info = env.step(action)
            next_state = util.update_state_arr(state, process(next_frame))
            
            agent.remember(state, action, reward, next_state, done)
            
            state = next_state
            score += reward
            
            if done: 
                logger.log("Episode {:>5}/{}, #frames {:>3}, score {}, epsilon {}"
                           .format(episode+1, n_episodes, t, score, agent.epsilon))
                break
            
        agent.replay(batch_size=128)
        
        if episode % save_interval == 0:
            agent.save_model(model_name)
            
    agent.save_model(model_name)
        

#### Main

In [None]:
from ai_agent import Agent
import gym

env = gym.make("Breakout-v0") #env = gym.make("BreakoutDeterministic-v4")
n_actions = env.action_space.n
state_shape = (IMG_WIDTH, IMG_HEIGHT, N_FRAMES)

agent = Agent(state_shape, n_actions, exploit=False)

model_name = "model"
#agent.new_model()
agent.load_model(model_name) 
train(env, agent, n_episodes, model_name=model_name, save_interval=50)

Episode     6/10000, #frames 337, score 3.0, epsilon 0.9752487531218751
Episode     7/10000, #frames 259, score 1.0, epsilon 0.9703725093562657
Episode     8/10000, #frames 166, score 0.0, epsilon 0.9655206468094844
Episode     9/10000, #frames 245, score 1.0, epsilon 0.960693043575437
Episode    10/10000, #frames 244, score 1.0, epsilon 0.9558895783575597
Episode    11/10000, #frames 189, score 0.0, epsilon 0.9511101304657719
Episode    12/10000, #frames 167, score 0.0, epsilon 0.946354579813443
Episode    13/10000, #frames 237, score 1.0, epsilon 0.9416228069143757
Episode    14/10000, #frames 215, score 1.0, epsilon 0.9369146928798039
Episode    15/10000, #frames 261, score 2.0, epsilon 0.9322301194154049
