# Advanced ML Part II // Lecture 03 Scratch

In [1]:
# written by John P. Cunningham, for use in lecture
# continues many of the conventions set out in Wenda Zhou's excellent tf tutorial
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

### Load and sample the data

In [2]:
# get mnist data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../data/mnist', one_hot=True)

Extracting ../data/mnist/train-images-idx3-ubyte.gz
Extracting ../data/mnist/train-labels-idx1-ubyte.gz
Extracting ../data/mnist/t10k-images-idx3-ubyte.gz
Extracting ../data/mnist/t10k-labels-idx1-ubyte.gz


### Build models

In [7]:
# cnn conv stuff
def conv(x, W):
    """simple wrapper for tf.nn.conv2d"""
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def maxpool(x):
    """simple wrapper for tf.nn.max_pool with stride size 2"""
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [49]:
# elaborate the compute_logits code to include a variety of models
def compute_logits(x, model_type, pkeep):
    """Compute the logits of the model"""
    if model_type=='lr':
        W = tf.get_variable('W', shape=[28*28, 10])
        b = tf.get_variable('b', shape=[10])
        logits = tf.add(tf.matmul(x, W), b, name='logits_lr')
    elif model_type=='cnn_cf':
        # try a 1 layer cnn
        n1 = 64
        x_image = tf.reshape(x, [-1,28,28,1]) # batch, then width, height, channels
        # cnn layer 1
        W_conv1 = tf.get_variable('W_conv1', shape=[5, 5, 1, n1])
        b_conv1 = tf.get_variable('b_conv1', shape=[n1])
        h_conv1 = tf.nn.relu(tf.add(conv(x_image, W_conv1), b_conv1))
        # fc layer to logits
        h_conv1_flat = tf.reshape(h_conv1, [-1, 28*28*n1])
        W_fc1 = tf.get_variable('W_fc1', shape=[28*28*n1, 10])
        b_fc1 = tf.get_variable('b_fc1', shape=[10])
        logits = tf.add(tf.matmul(h_conv1_flat, W_fc1), b_fc1, name='logits_cnn1')
    elif model_type=='cnn_cpcpff':
        # 2 layer cnn, similar architecture to tensorflow's deep mnist tutorial, so you can compare
        n1 = 32
        n2 = 64
        n3 = 1024
        x_image = tf.reshape(x, [-1,28,28,1]) # batch, then width, height, channels
        # cnn layer 1
        W_conv1 = tf.get_variable('W_conv1', shape=[5, 5, 1, n1])
        b_conv1 = tf.get_variable('b_conv1', shape=[n1])
        h_conv1 = tf.nn.relu(tf.add(conv(x_image, W_conv1), b_conv1))
        # pool 1
        h_pool1 = maxpool(h_conv1)
        # cnn layer 2
        W_conv2 = tf.get_variable('W_conv2', shape=[5, 5, n1, n2])
        b_conv2 = tf.get_variable('b_conv2', shape=[n2])
        h_conv2 = tf.nn.relu(tf.add(conv(h_pool1, W_conv2), b_conv2))
        # pool 2
        h_pool2 = maxpool(h_conv2)
        # fc layer to logits (7x7 since 2 rounds of maxpool)
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*n2])
        W_fc1 = tf.get_variable('W_fc1', shape=[7*7*n2, n3])
        b_fc1 = tf.get_variable('b_fc1', shape=[n3])
        h_fc1 = tf.nn.relu(tf.add(tf.matmul(h_pool2_flat, W_fc1), b_fc1))
        # one more fc layer
        # ... again, this is the logistic layer with softmax readout
        W_fc2 = tf.get_variable('W_fc2', shape=[n3,10])
        b_fc2 = tf.get_variable('b_fc2', shape=[10])
        logits = tf.add(tf.matmul(h_fc1, W_fc2), b_fc2, name='logits_cnn2')
    elif model_type=='cnn_cpcpfdf':
        # same as above but add dropout.
        # 2 layer cnn, similar architecture to tensorflow's deep mnist tutorial, so you can compare
        n1 = 32
        n2 = 64
        n3 = 1024
        x_image = tf.reshape(x, [-1,28,28,1]) # batch, then width, height, channels
        # cnn layer 1
        W_conv1 = tf.get_variable('W_conv1', shape=[5, 5, 1, n1])
        b_conv1 = tf.get_variable('b_conv1', shape=[n1])
        h_conv1 = tf.nn.relu(tf.add(conv(x_image, W_conv1), b_conv1))
        # pool 1
        h_pool1 = maxpool(h_conv1)
        # cnn layer 2
        W_conv2 = tf.get_variable('W_conv2', shape=[5, 5, n1, n2])
        b_conv2 = tf.get_variable('b_conv2', shape=[n2])
        h_conv2 = tf.nn.relu(tf.add(conv(h_pool1, W_conv2), b_conv2))
        # pool 2
        h_pool2 = maxpool(h_conv2)
        # fc layer to logits (7x7 since 2 rounds of maxpool)
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*n2])
        W_fc1 = tf.get_variable('W_fc1', shape=[7*7*n2, n3])
        b_fc1 = tf.get_variable('b_fc1', shape=[n3])
        h_fc1 = tf.nn.relu(tf.add(tf.matmul(h_pool2_flat, W_fc1), b_fc1))
        # insert a dropout layer here.
        h_fc1_drop = tf.nn.dropout(h_fc1, pkeep)
        # one more fc layer
        # ... again, this is the logistic layer with softmax readout
        W_fc2 = tf.get_variable('W_fc2', shape=[n3,10])
        b_fc2 = tf.get_variable('b_fc2', shape=[10])
        logits = tf.add(tf.matmul(h_fc1_drop, W_fc2), b_fc2, name='logits_cnn2')
    else: 
        print('error not a valid model type')

    return logits

def compute_cross_entropy(logits, y):
    # Compute the average cross-entropy across all the examples.
    numerical_instability_example = 1
    if numerical_instability_example:
        y_pred = tf.nn.softmax(logits, name='y_pred') # the predicted probability for each example.
        cross_ent = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_pred), reduction_indices=[1]))
    else:
        sm_ce = tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=logits, name='cross_ent_terms')
        cross_ent = tf.reduce_mean(sm_ce, name='cross_ent')
    return cross_ent

def compute_accuracy(logits, y):
    prediction = tf.argmax(logits, 1, name='pred_class')
    true_label = tf.argmax(y, 1, name='true_class')
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, true_label), tf.float32))
    return accuracy

### Run models

In [59]:
# choose case to run 
model_type = 'cnn_cf' 
dir_name = 'logs/scratch03_cnnz/{}'.format(model_type)
batch_size = 100

In [60]:
with tf.Graph().as_default():
    # We build the model here as before
    x = tf.placeholder(tf.float32, [None, 784], name='x')
    y = tf.placeholder(tf.float32, [None, 10], name='y')
    pkeep = tf.placeholder(tf.float32, name='pkeep')
    
    with tf.name_scope('model'):
        logits = compute_logits(x, model_type, pkeep)
    with tf.name_scope('loss'):
        loss = compute_cross_entropy(logits=logits, y=y)
    with tf.name_scope('accuracy'):
        accuracy = compute_accuracy(logits, y)
    
    with tf.name_scope('opt'):
        if model_type=='lr':
            opt = tf.train.GradientDescentOptimizer(0.5)
        else:
            opt = tf.train.AdamOptimizer(1e-4)
        train_step = opt.minimize(loss)
    
    with tf.name_scope('summaries'):
        # create summary for loss and accuracy
        tf.summary.scalar('loss', loss) 
        tf.summary.scalar('accuracy', accuracy)
        # create summary for logits
        tf.summary.histogram('logits', logits)
        # create summary for input image
        tf.summary.image('input', tf.reshape(x, [-1, 28, 28, 1]))
    
        summary_op = tf.summary.merge_all()
    
    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter(dir_name, sess.graph)
        #train_writer = tf.train.SummaryWriter(dir_name + '/train', sess.graph)
        #test_writer = tf.train.SummaryWriter(dir_name + '/test')
        summary_writer_train = tf.summary.FileWriter(dir_name+'/train', sess.graph)
        summary_writer_test = tf.summary.FileWriter(dir_name+'/test')
        
        sess.run(tf.global_variables_initializer())
    
        for i in range(1001):
            batch = mnist.train.next_batch(batch_size)
            X_batch = batch[0]
            y_batch = batch[1]

            # now run
            _ , summary = sess.run((train_step, summary_op),
                                      feed_dict={x: X_batch, y: y_batch, pkeep:0.5})
            
            # write the summary output to file
            if i%10==0:
                summary_writer_train.add_summary(summary, i)

            # print diagnostics
            #print(".", end='', flush=True)
            #if i%100 == 0:
            #    train_error = sess.run(accuracy, {x: mnist.train.images[0:1000,:], y: mnist.train.labels[0:1000,:]})
            #    print("\rAfter step {0:3d}, training accuracy {1:0.4f}".format(i, train_error), flush=True)
            if i%100 == 0: 
                (test_error, summary) = sess.run((accuracy,summary_op), {x:mnist.test.images, y:mnist.test.labels, pkeep:1.0})
                print("\rAfter step {0:3d}, test accuracy {1:0.4f}".format(i, test_error), flush=True)
                summary_writer_test.add_summary(summary, i)


After step   0, test accuracy 0.0793
After step 100, test accuracy 0.3061
After step 200, test accuracy 0.5411
After step 300, test accuracy 0.6778
After step 400, test accuracy 0.7334
After step 500, test accuracy 0.7683
After step 600, test accuracy 0.7911
After step 700, test accuracy 0.8049
After step 800, test accuracy 0.8180
After step 900, test accuracy 0.8218
After step 1000, test accuracy 0.8309
