#### Constants

In [1]:
IMG_HEIGHT=100
IMG_WIDTH=100

epsilon = 0.4
gamma = 0.99
n_episodes = 500

#### Fill memory

In [2]:
import ai_util as util

def fill_memory(env, memory, img_pp):
    env.reset()
    state = img_pp.preprocess(env.render(mode='rgb_array'))
    states = util.create_states_arr(state)
    for t in range(32):
        state = img_pp.preprocess(env.render(mode='rgb_array'))
        states = util.update_states(states, state)
        action = env.action_space.sample()
        next_state_raw, reward, done, _ = env.step(action)
        next_state = img_pp.preprocess(next_state_raw)
        next_states = util.update_states(states, next_state)
        memory.add(states, action, reward, next_states, done)

#### Training

In [3]:
from ai_memory import Memory
import ai_display as display
import random

def train(env, agent, n_episodes, img_pp):
    memory = Memory()
    fill_memory(env, memory, img_pp)
    
    for episode in range(n_episodes):
        env.reset()
        state = img_pp.preprocess(env.render(mode='rgb_array'))
        states = util.create_states_arr(state)
        done = False

        while not done:
            state_raw = env.render(mode='rgb_array')
            state = img_pp.preprocess(state_raw)
            states = util.update_states(states, state)
            
            #display.show_state(state, env.spec.id, t)

            if random.random() < epsilon:
                action = env.action_space.sample() 
            else:
                action = agent.predict(states)

            next_state_raw, reward, done, info = env.step(action)
            next_state = img_pp.preprocess(next_state_raw)
            next_states = util.update_states(states, next_state)
            
            memory.add(states, action, reward, next_states, done)
            batch = memory.sample(32)
            agent.fit(batch, gamma, n_actions)
            
            states = next_states

        print("Finished episode {}".format(episode))

#### Playing

In [4]:
import ai_display as display

def play(env, agent, n_episodes, img_pp):
    env.reset()
    state = img_pp.preprocess(env.render(mode='rgb_array'))
    states = util.create_states_arr(state)
    done = False
    
    while not done:
        state = img_pp.preprocess(env.render(mode='rgb_array'))
        states = util.update_states(states, state)
        
        display.show_state(state, env.spec.id)
        
        if random.random() < epsilon:
            action = env.action_space.sample() 
        else:
            action = agent.predict(states)
                
        next_state_raw, reward, done, info = env.step(action)
        
        next_state = img_pp.preprocess(next_state_raw)
        states = util.update_states(states, next_state)

#### Main

In [None]:
from ai_agent import Agent
from ai_image_preprocess import ImagePreprocessor
import gym

agent = Agent()
img_pp = ImagePreprocessor(IMG_WIDTH, IMG_HEIGHT)

env = gym.make("BreakoutDeterministic-v4")
state_shape = (IMG_WIDTH, IMG_HEIGHT, 4)
n_actions = env.action_space.n

# Train
#agent.build_model(state_shape, n_actions)
agent.load_model("model")
train(env, agent, n_episodes, img_pp)
agent.save_model("model")

# Play
play(env, agent, n_episodes, img_pp)    

Using TensorFlow backend.
