# Experiment

In [1]:
import os
import sys

from datetime import datetime as dt

import cv2
import gym
import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
env_name = 'Pong-v0'
env = gym.make(env_name)

In [33]:
state_shape = env.observation_space.shape
n_actions = env.action_space.n
print(f'States: {state_shape}\tActions: {n_actions}')

States: (210, 160, 3)	Actions: 6


In [32]:
def network(state):
    net = tf.reshape(state, [-1, *state_shape])
    # 3 convolutional layers
    net = tf.layers.conv2d(net, filters=16, kernel_size=5, strides=1, padding='same')
    net = tf.layers.conv2d(net, filters=16, kernel_size=5, strides=2, padding='same')
    net = tf.layers.conv2d(net, filters=32, kernel_size=5, strides=2, padding='same')
    # flattening layer
    net = tf.contrib.layers.flatten(net)
    # 2 fully connected layers
    net = tf.layers.dense(net, units=128, activation=tf.nn.relu)
    net = tf.layers.dense(net, units=n_actions)
    return net

In [36]:
# Reset default graph
tf.reset_default_graph()

# Placeholders
state_placeholder = tf.placeholder(tf.float32, shape=state_shape)
action_paceholder = tf.placeholder(tf.int32, shape=[n_actions])

# Loss function
logits = network(state_placeholder)
x_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, 
                                                    labels=action_paceholder)
loss = tf.reduce_mean(x_entropy)

# Training
global_step = tf.Variable(0, trainable=False)
optimizer = tf.train.AdamOptimizer()
train = optimizer.minimize(loss, global_step=global_step)

### Tesnorflow's `Session`

In [37]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

### Tensorboard

In [38]:
save_dir = 'saved/experiment/{env_name}'
tboard_dir = os.path.join(save_dir, 'tensorboard')
logdir = os.path.join(tboard_dir, 'log')

model_dir = os.path.join(save_dir, 'models')
model_path = os.path.join(model_dir, 'model.ckpt')

# Summary
tf.summary.scalar('loss', loss)
merged = tf.summary.merge_all()

# Saver & Writer
saver = tf.train.Saver()
writer = tf.summary.FileWriter(logdir=logdir, graph=sess.graph)

In [None]:
if tf.gfile.Exists(model_dir):
    try:
        print('Attempting to restore last checkpoint')
        ckpt_file = tf.train.latest_checkpoint(model_dir)
        saver.restore(sess=sess, save_path=ckpt_file)
        print(f'Successfully restored last chekcpoint - {ckpt_file}')
    except Exception as e:
        sys.stderr.write(f'ERR: Could not restore checkpoint. {e}')
        sys.stderr.flush()
else:
    tf.gfile.MakeDirs(model_dir)
    print(f'Created checkpoint directory - {model_dir}')