In [12]:
import tensorflow as tf
import numpy as np
import wrapped_flappy_bird as game
from collections import deque
import cv2
import sys

In [2]:
GAME = 'flappybird'
ACTIONS = 2 # numbers of valid actions

In [3]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.01)
    return tf.Variable(initial)
def bias_variable(shape):
    initial = tf.constant(0.01, shape=shape)
    return tf.Variable(initial)
def conv2d(x, W, stride):
    return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')
def max_pool(x): #2*2
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [4]:
def createNetwork():
    W_conv1 = weight_variable([8, 8, 4, 32])
    b_conv1 = bias_variable([32])

    W_conv2 = weight_variable([4, 4, 32, 64])
    b_conv2 = bias_variable([64])

    W_conv3 = weight_variable([3, 3, 64, 64])
    b_conv3 = bias_variable([64])

    W_fc1 = weight_variable([256, 256])
    b_fc1 = bias_variable([256])

    W_fc2 = weight_variable([256, ACTIONS])
    b_fc2 = bias_variable([ACTIONS])
    
    #input layer
    s = tf.placeholder('float', [None, 80, 80, 4])
    
    #hidden layers
    h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1) #20*20*32
    h_pool1 = max_pool(h_conv1) #10*10*32
    
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, 2) + b_conv2) #5*5*64
    h_pool2 = max_pool(h_conv2) #3*3*64
    
    h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3, 1) + b_conv3) #3*3*64
    h_pool3 = max_pool(h_conv3) #2*2*64=256
    
    #full connect
    h_pool3_flat = tf.reshape(h_pool3, [-1, 256])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
    
    #readout layer
    readout = tf.matmul(h_fc1, W_fc2) +b_fc2
    
    return s, readout, h_fc1
    

In [14]:
def trainNetwork(s, readout, h_fc1, sess):
    #define cost function
    a = tf.placeholder('float', [None, ACTIONS])
    y = tf.placeholder('float', [None])
    readout_action = tf.reduce_sum(tf.multiply(readout, a))
    cost = tf. reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)
    
    #open up a game state to communicate with emulator
    game_state = game.GameState()
    
    #store the previous observations in replay memory
    D = deque()
    
    #get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

In [7]:
np.zeros(ACTIONS)

array([ 0.,  0.])