# Importing Libraries

In [1]:
import numpy as np
import collections
import os
import struct
import gzip
import numpy as np
import tensorflow as tf
import urllib
import sys
import matplotlib.pyplot as plt

# Data Processing

### Format to Store the data

In [2]:
Datasets = collections.namedtuple('Datasets', ['train', 'validation', 'test'])

* Datasets are stored in Tuple of train, Validation, Test

### Functions

##### Function for downloading Data

* With this function we can able to download the Data from the website (Which is same as inbuilt MNIST dataset)

* If the files already Downloaded then it skips the download

In [3]:
def maybe_download(filename):
    WORK_DIRECTORY = os.getcwd()
    SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
    if not tf.gfile.Exists(WORK_DIRECTORY):
        tf.gfile.MakeDirs(WORK_DIRECTORY)
    filepath = os.path.join(WORK_DIRECTORY, filename)
    if not tf.gfile.Exists(filepath):
        filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)
        with tf.gfile.GFile(filepath) as f:
            size = f.size()
        print('Successfully downloaded', filename, size, 'bytes.')
    return filepath

##### Functions for Extracting data

* Data that we get is in the form of idx-ubyte.gz format, which cannot be read normally

* It is a type of Zip file, we will use gzip to open the file and read the data as a stream

In [4]:
IMAGE_SIZE = 28
def extract_data(filename, num_images):
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        bytestream.read(16)
        buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images)
        data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
        data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, 1)
    return data

def extract_labels(filename, num_images):
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
    return labels

* We reshaped the image into shape of [ : , 28, 28, 1]

### Download the Data

In [5]:
train_data_filename = maybe_download('train-images-idx3-ubyte.gz')
train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')
test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')
test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')

### Extract Data

In [6]:
train_images = extract_data('train-images-idx3-ubyte.gz', 60000)
train_labels = extract_labels('train-labels-idx1-ubyte.gz', 60000)
test_images = extract_data('t10k-images-idx3-ubyte.gz', 10000)
test_labels = extract_labels('t10k-labels-idx1-ubyte.gz', 10000)

Extracting train-images-idx3-ubyte.gz
Extracting train-labels-idx1-ubyte.gz
Extracting t10k-images-idx3-ubyte.gz
Extracting t10k-labels-idx1-ubyte.gz


### Function for one hot encoding

In [7]:
def onehot(labels):
    l = np.zeros([labels.shape[0], 10])
    for i, val in enumerate(labels):
        l[i, val] = 1
    return l

* if the labels that we get are not one hot encoded, this function will help us to encode it

### Class and Function for Preparing the Dataet

In [8]:
class getdata(object):
    
    def __init__(self, images, labels, one_hot = True):
        assert images.shape[0] == labels.shape[0]
        self.num_examples = images.shape[0]
        assert images.shape[3] == 1
        self.images = images
        self.one_hot = one_hot
        self.labels = labels
        if not one_hot:
            self.labels = onehot(labels)
        self.epoch = 0
        self.startofepoch = 0
        
    def nextbatch(self, batch_size):
        start = self.startofepoch
        end = start + batch_size
        
        if end > self.num_examples:
            self.epoch += 1
            
            perm = np.arange(self.num_examples)
            np.random.shuffle(perm)
            
            self.images = self.images[perm]
            self.labels = self.labels[perm]
            
            start = 0
            end = batch_size
            
        self.startofepoch = end
        return self.images[start:end], self.labels[start:end]

def createdata(train_images, train_labels, test_images, test_labels):
    perm = np.arange(train_images.shape[0])
    np.random.shuffle(perm)
    train_images = train_images[perm]
    train_labels = train_labels[perm]
    validation_images = train_images[int(0.7*train_images.shape[0]):]
    validation_labels = train_labels[int(0.7*train_labels.shape[0]):]
    train_images = train_images[:int(0.7*train_images.shape[0])]
    train_labels = train_labels[:int(0.7*train_labels.shape[0])]
    
    train = getdata(train_images, train_labels, one_hot = True)
    validation = getdata(validation_images, validation_labels, one_hot = True)
    test = getdata(test_images, test_labels, one_hot = True)
    
    return Datasets(train=train, validation=validation, test=test)

* Here the class getdata helps to iterate as batches whereas function createdata helps to convert the extracted data into collection of tuple that we defined above

### Preparing Dataset

In [9]:
data = createdata(train_images, train_labels, test_images, test_labels)

# Model

### Setting Log

In [10]:
tf.logging.set_verbosity(tf.logging.INFO)

* Sets the condition to log all the infoof graph

### Defining model

Here the model that we constructed is:

* Convolution layer - 32, [5,5]
* Maxpool - stride 2, [2,2]
* Convolution layer - 64, [5,5]
* Maxpool - stride 2, [2,2]
* Flatten to [:, 7*7*64]
* dense layer - 1024 units
* dropout - 0.4
* Output - labels (Not one hot encoded)


Created a Estimator with:
* Loss function as sparse_softmax_cross_entropy
* Optimizer as GradientDescentOptimizer
* Learning rate 0.001

In [11]:
def model(features, labels, mode):

    input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
    conv1 = tf.layers.conv2d(inputs=input_layer,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu,
        name = "Conv1")

    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2, name = "Pool1")
    conv2 = tf.layers.conv2d(inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu,
        name = "Conv2")

    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2, name = "Pool2")
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu, name = "Fc1")

    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    logits = tf.layers.dense(inputs=dropout, units=10, name = "out")

    predictions = {
        "classes": tf.argmax(input=logits, axis=1, name = "Prediction"),
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }

    eval_metric_ops = {
        "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"], name = "Accuracy_value")}

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"], name = "Accuracy_value")}
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

### Defining a Classifier

Created a Classifier with checkpoints in directory : tboard/mod1

In [12]:
classifier = tf.estimator.Estimator(model_fn=model, model_dir="tboard/mod1")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_summary_steps': 100, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7ff7f33240>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_num_worker_replicas': 1, '_tf_random_seed': None, '_service': None, '_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_master': '', '_session_config': None, '_model_dir': 'tboard/mod1', '_log_step_count_steps': 100, '_keep_checkpoint_max': 5, '_is_chief': True, '_task_type': 'worker'}


### Train Function

Created a Train Function for estimator

In [13]:
train_fn = tf.estimator.inputs.numpy_input_fn(x={"x": data.train.images}, 
                                                    y=data.train.labels, 
                                                    batch_size=100,
                                                    num_epochs=None,
                                                    shuffle=True)

### Train Model

Here model is trained by updating all variables in checkpoints stored in directory for every 100 global steps. After 100 steps it takes the values from the checkpoints and retrain the model

In [14]:
classifier.train(input_fn=train_fn,steps=20000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into tboard/mod1/model.ckpt.
INFO:tensorflow:step = 1, loss = 47.7834
INFO:tensorflow:global_step/sec: 2.45895
INFO:tensorflow:step = 101, loss = 0.826965 (40.669 sec)
INFO:tensorflow:global_step/sec: 2.09505
INFO:tensorflow:step = 201, loss = 0.397608 (47.732 sec)
INFO:tensorflow:global_step/sec: 2.17857
INFO:tensorflow:step = 301, loss = 0.284991 (45.916 sec)
INFO:tensorflow:global_step/sec: 2.26095
INFO:tensorflow:step = 401, loss = 0.303349 (44.214 sec)
INFO:tensorflow:global_step/sec: 2.39338
INFO:tensorflow:step = 501, loss = 0.364552 (41.782 sec)
INFO:tensorflow:global_step/sec: 2.21541
INFO:tensorflow:step = 601, loss = 0.27745 (45.139 sec)
INFO:tensorflow:global_step/sec: 2.29288
INFO:tensorflow:step = 701, loss = 0.252174 (43.612 sec)
INFO:tensorflow:global_step/sec: 2.55308
INFO:tensorflow:step = 801, loss = 0.11744 (39.168 sec)
INFO:tensorflow:global_step/sec: 2.31433
INFO:tensorflow:step =

INFO:tensorflow:step = 7801, loss = 0.0870436 (38.975 sec)
INFO:tensorflow:global_step/sec: 2.50085
INFO:tensorflow:step = 7901, loss = 0.0739786 (39.987 sec)
INFO:tensorflow:Saving checkpoints for 7923 into tboard/mod1/model.ckpt.
INFO:tensorflow:global_step/sec: 2.05205
INFO:tensorflow:step = 8001, loss = 0.0540874 (48.732 sec)
INFO:tensorflow:global_step/sec: 1.98734
INFO:tensorflow:step = 8101, loss = 0.0270197 (50.319 sec)
INFO:tensorflow:global_step/sec: 1.69674
INFO:tensorflow:step = 8201, loss = 0.0525406 (58.952 sec)
INFO:tensorflow:global_step/sec: 1.69372
INFO:tensorflow:step = 8301, loss = 0.0288693 (59.051 sec)
INFO:tensorflow:global_step/sec: 1.93637
INFO:tensorflow:step = 8401, loss = 0.0697171 (51.618 sec)
INFO:tensorflow:global_step/sec: 2.17965
INFO:tensorflow:step = 8501, loss = 0.127387 (45.878 sec)
INFO:tensorflow:global_step/sec: 2.39946
INFO:tensorflow:step = 8601, loss = 0.0293512 (41.676 sec)
INFO:tensorflow:global_step/sec: 2.1561
INFO:tensorflow:step = 8701, 

INFO:tensorflow:step = 15501, loss = 0.0424496 (43.287 sec)
INFO:tensorflow:global_step/sec: 2.31507
INFO:tensorflow:step = 15601, loss = 0.053282 (43.196 sec)
INFO:tensorflow:global_step/sec: 2.34069
INFO:tensorflow:step = 15701, loss = 0.0111168 (42.722 sec)
INFO:tensorflow:global_step/sec: 1.92685
INFO:tensorflow:step = 15801, loss = 0.0117995 (51.899 sec)
INFO:tensorflow:global_step/sec: 2.06895
INFO:tensorflow:step = 15901, loss = 0.00835256 (48.333 sec)
INFO:tensorflow:global_step/sec: 2.12528
INFO:tensorflow:step = 16001, loss = 0.00916184 (47.052 sec)
INFO:tensorflow:global_step/sec: 2.26027
INFO:tensorflow:step = 16101, loss = 0.00422559 (44.243 sec)
INFO:tensorflow:global_step/sec: 2.42571
INFO:tensorflow:step = 16201, loss = 0.0457886 (41.226 sec)
INFO:tensorflow:global_step/sec: 2.22159
INFO:tensorflow:step = 16301, loss = 0.00277359 (45.013 sec)
INFO:tensorflow:Saving checkpoints for 16329 into tboard/mod1/model.ckpt.
INFO:tensorflow:global_step/sec: 1.83595
INFO:tensorflo

<tensorflow.python.estimator.estimator.Estimator at 0x7f7ff6449470>

We can observe Tensorboard Visualisations by running the following code:

> tensorboard --logdir=path_to_directory_tboard --host=127.0.0.1


### Validation function

Create a Validation function with validation data and number of epochs 1

In [15]:
eval_fn = tf.estimator.inputs.numpy_input_fn(x={"x": data.validation.images},
                                                        y=data.validation.labels,
                                                        num_epochs=1,
                                                        shuffle=False)

### Validation

Here it takes the stored model from the checkpoint and run the prediction

In [16]:
eval_results = classifier.evaluate(input_fn=eval_fn)
print(eval_results)

INFO:tensorflow:Starting evaluation at 2018-03-09-18:43:06
INFO:tensorflow:Restoring parameters from tboard/mod1/model.ckpt-20000
INFO:tensorflow:Finished evaluation at 2018-03-09-18:43:34
INFO:tensorflow:Saving dict for global step 20000: accuracy = 0.988111, global_step = 20000, loss = 0.038156
{'accuracy': 0.98811114, 'loss': 0.038156018, 'global_step': 20000}


### Test Function

Similar is the case with Test Data

In [17]:
test_fn = tf.estimator.inputs.numpy_input_fn(x={"x": data.test.images},
                                                        y=data.test.labels,
                                                        num_epochs=1,
                                                        shuffle=False)

### Test Accuracy

In [18]:
test_results = classifier.evaluate(input_fn=test_fn)
print(test_results)

INFO:tensorflow:Starting evaluation at 2018-03-09-18:56:33
INFO:tensorflow:Restoring parameters from tboard/mod1/model.ckpt-20000
INFO:tensorflow:Finished evaluation at 2018-03-09-18:56:46
INFO:tensorflow:Saving dict for global step 20000: accuracy = 0.9899, global_step = 20000, loss = 0.0294964
{'accuracy': 0.98989999, 'loss': 0.029496437, 'global_step': 20000}


# Various Models

[Explanation for various models is explained here](https://github.com/wizardboy2010/MNIST/blob/master/Various%20Models.md)

Out of all these models mod1 with batch normalization in Dense layer gave a highest accuracy of 99.66% in Test.