## Atari Breakout Q-Learning

##### Allow memory growth for tensorflow, Windows work around

In [1]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

##### Verify GPU availability

from tensorflow.python.client import device_lib  
print(device_lib.list_local_devices())  
from keras import backend as K  
print(K.tensorflow_backend._get_available_gpus())  

#### Training

In [2]:
from ai_image_preprocess import preprocess

def step(env, action, state):
    next_frame_1, reward_1, done_1, _ = env.step(action)
    next_frame_2, reward_2, done_2, _ = env.step(action)
    next_state = util.update_state_arr(state, preprocess(next_frame_1), preprocess(next_frame_2))
    return (next_state, int(reward_1 + reward_2), done_1 or done_2)

In [3]:
import ai_display as display
import ai_util as util
from ai_logger import Logger
import time

def train(env, agent, n_episodes=10000, model_name="model", save_interval=50, log_interval=5):
    logger = Logger(10, "episode | frames | score | step time")
    
    for episode in range(n_episodes):
        
        frame = env.reset()
        state = util.create_state_arr(preprocess(frame))
        score = 0
        
        start_time = time.time()
        for t in range(1000):
            #display.show_state(state, env.spec.id, t, score)

            action = agent.act(state)

            next_state, reward, done = step(env, action, state)
            
            agent.remember(state, action, reward, next_state, done)
            
            state = next_state
            score += reward
            
            agent.replay(batch_size=32)
            
            if done: 
                logger.add("{:>7d} | {:>6d} | {:>5d} | {:>9.5f}"
                       .format(episode+1, t, score, (time.time() - start_time)/t))
                break
                
        if episode % log_interval == 0:
            logger.log()
            
        if episode % save_interval == 0:
            agent.save_model(model_name)
        
    agent.save_model(model_name)
        

#### Main

In [4]:
from ai_agent import Agent
import gym

env = gym.make("BreakoutDeterministic-v4")
n_actions = env.action_space.n
state_shape = (105, 80, 4)

agent = Agent(state_shape, n_actions, epsilon=0.4)

model_name = "models/new-model.h5"
#agent.new_model()
agent.load_model(model_name) 
train(env, agent, model_name=model_name)


episode | frames | score | step time
     17 |    117 |     0 |   0.05557
     18 |    109 |     0 |   0.05920
     19 |    108 |     1 |   0.05541
     20 |    154 |     2 |   0.05534
     21 |    145 |     3 |   0.05521
     22 |    144 |     2 |   0.05949
     23 |    124 |     1 |   0.05554
     24 |    131 |     2 |   0.05539
     25 |    104 |     1 |   0.05540
     26 |    180 |     0 |   0.05830


KeyboardInterrupt: 

from collections import deque  
import numpy as np  
import random  
  
memory = deque()  
state = [1, 2]; action = 2; reward = 99; next_state = [3, 4]; done = False;  
memory.append(np.array([state, action, reward, next_state, done]))  
memory.append(np.array([state, action, reward, next_state, done]))  
  
batch = np.array(random.sample(memory, 2))  
  
states, actions, rewards, next_states, done = np.array(np.split(batch, batch.shape[1], axis=1))[:, :, 0]  
actions = util.one_hot_encode(3, actions)  
states = np.stack(states)  
next_states = np.stack(next_states)  
  
print("states {}".format(states))  
print("actions {}".format(actions))  
print("rewards {}".format(rewards))  
print("next_states {}".format(next_states))  
print("done {}".format(done))  