## Download data

Data is taken from tutorials available in tensorflow

MNIST is a set of images of handwritten digits.

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

import tensorflow as tf


In [None]:
from matplotlib import pyplot as plt
import numpy as np
def gen_image(arr):
    two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8)
    plt.imshow(two_d, interpolation='nearest')
    return plt

In [None]:
batch_xs, batch_ys = mnist.test.next_batch(2)
gen_image(batch_xs[0]).show()
gen_image(batch_xs[1]).show()

In [None]:
batch_ys

In [None]:
from __future__ import print_function

## Parameters

* learning rate is controllong the iteration step size, the smaller the slower
* training epoch is the maximum number of iterations
* batch size controls the number of samples used for each batch
* display step is used to control display frequency (#epochs)


In [None]:
# Parameters
learning_rate = 0.0001
training_epochs = 15
batch_size = 100
display_step = 1

## Network parameters

* number of neurons in first layer
* number of neurons in second layer
* number of features (here 28*28 pixels)
* number of classes (outputs)

In [None]:
# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

## Data Tensors

Create a placeholder for data (tensors)
```
tf.placeholder(
    dtype,
    shape=None,
    name=None
)```

Here, the shape has unknown number of dimensions (None) and known size for each dimensions.


In [None]:
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

## Weights and biases

Model parameters:

* weights matrices on edges to layer1, layer1 to layer2 and layer2 to output
* biases vectors for each layer and output

In [None]:
# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

## create the model graph


$output =((X \cdot H_1 + B_1) \cdot H_2 + B_2) \cdot H_{out} + B_{out}$

The $output$ vector is to be transfromed with a logit function to represent probabilities

In [None]:
# Create model
def multilayer_perceptron(x):
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

## Instantiate a model

In [None]:
# Construct model
logits = multilayer_perceptron(X)

## Loss and optimizer

Training is optimizing for a loss function

The loss operator is a mean of the cross entropy : $−\sum{p(x) log(q(x))}$

ADAM Optimizer is an ADAptive Momentum method to accelerate convergence


In [None]:
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

## Tensoflow init variables

Variables are initialized before starting a session

In [None]:
# Initializing the variables
init = tf.global_variables_initializer()


## Run the training and evaluation



In [None]:
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(1):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, loss_op], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost={:.9f}".format(avg_cost))
    print("Optimization Finished!")

    # Test model
    pred = tf.nn.softmax(logits)  # Apply softmax to logits
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))
    

## Some questions

* What would happen if you displayed the validation set accuracy for each epoch
* Any use for increasing the number of epochs?
* How could we find a stopping rule in training?
* remove one layer
* Add one layer