In [1]:
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import tarfile
from IPython.display import display, Image
from scipy import ndimage
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle
from six.moves import range
from sklearn.metrics import confusion_matrix
import tensorflow as tf
print(tf.__version__)

%matplotlib inline

1.3.0


In [2]:
## loading data from pickle files 
## again taking only 10000 samples because lack of hardware

## NOTE: considering that pickle files are in unit-1 folder

train_folders = ['../unit-1/notMNIST_large/A', '../unit-1/notMNIST_large/B', '../unit-1/notMNIST_large/C', 
                 '../unit-1/notMNIST_large/D', '../unit-1/notMNIST_large/E', '../unit-1/notMNIST_large/F', 
                 '../unit-1/notMNIST_large/G', '../unit-1/notMNIST_large/H', '../unit-1/notMNIST_large/I', '../unit-1/notMNIST_large/J']
test_folders = ['../unit-1/notMNIST_small/A', '../unit-1/notMNIST_small/B', '../unit-1/notMNIST_small/C', 
                '../unit-1/notMNIST_small/D', '../unit-1/notMNIST_small/E', '../unit-1/notMNIST_small/F', 
                '../unit-1/notMNIST_small/G', '../unit-1/notMNIST_small/H', '../unit-1/notMNIST_small/I', '../unit-1/notMNIST_small/J']

In [3]:
train_datasets = []
for folder in train_folders:
  set_filename = folder + '.pickle'
  train_datasets.append(set_filename)
    
test_datasets = []
for folder in test_folders:
  set_filename = folder + '.pickle'
  test_datasets.append(set_filename)

In [4]:
### checking whether data is balanced across classes
### using only 10000 samples per class because lack of powerful hardware
train_dataset = []
num=0
for file in train_datasets:
  f = open(file,"rb")
  dataset = pickle.load(f)
  num = num +1
  train_dataset.append(dataset[0:10000,:,:])
  print(train_dataset[num-1].shape)

(10000, 28, 28)
(10000, 28, 28)
(10000, 28, 28)
(10000, 28, 28)
(10000, 28, 28)
(10000, 28, 28)
(10000, 28, 28)
(10000, 28, 28)
(10000, 28, 28)
(10000, 28, 28)


In [5]:
test_dataset = []
num = 0
for file in test_datasets:
  f = open(file,"rb")
  dataset = pickle.load(f)
  num = num +1
  test_dataset.append(dataset)
  print(dataset.shape)


## split train dataset to get validation set
train_labels = []
test_labels = []
for label in range(len(train_datasets)):
  for i in range(10000):
    train_labels.append(label) 
    
for label in range(len(test_datasets)):
  for i in range(test_dataset[label].shape[0]):
    test_labels.append(label) 
    

(1872, 28, 28)
(1873, 28, 28)
(1873, 28, 28)
(1873, 28, 28)
(1873, 28, 28)
(1872, 28, 28)
(1872, 28, 28)
(1872, 28, 28)
(1872, 28, 28)
(1872, 28, 28)


In [6]:
temp = []
for i in range(10):
  for j in range(train_dataset[i].shape[0]):
    temp.append(train_dataset[i][j])

train_dataset, temp = temp, train_dataset

temp = []
for i in range(10):
  for j in range(test_dataset[i].shape[0]):
    temp.append(test_dataset[i][j])

test_dataset, temp = temp, test_dataset

test_dataset = np.array(test_dataset)
test_dataset.shape

(18724, 28, 28)

In [8]:
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
train_dataset = np.array(train_dataset)
test_dataset = np.array(test_dataset)

In [9]:
train_dataset = train_dataset.reshape((-1,784))
test_dataset = test_dataset.reshape((-1,784))

In [10]:
trainData, validData, trainLabels, validLabels = train_test_split(train_dataset, train_labels, test_size = 0.1)

In [11]:
## function for one-hot encoding
num_labels = 10
def reformat(labels):
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return labels
trainLabels = reformat(trainLabels)
validLabels = reformat(validLabels)
test_labels = reformat(test_labels)

In [12]:
## final check of shape of dataset
print(trainData.shape, trainLabels.shape)
print(validData.shape, validLabels.shape)
print(test_dataset.shape, test_labels.shape)

(90000, 784) (90000, 10)
(10000, 784) (10000, 10)
(18724, 784) (18724, 10)


In [13]:
# With gradient descent training, even this much data is prohibitive.
# Subset the training data for faster turnaround.
train_subset = 1000
image_size = 28

graph = tf.Graph()
with graph.as_default():

  # Input data.
  # Load the training, validation and test data into constants that are
  # attached to the graph.
  tf_train_dataset = tf.constant(trainData[:train_subset, :])
  tf_train_labels = tf.constant(trainLabels[:train_subset])
  tf_valid_dataset = tf.constant(validData)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  # These are the parameters that we are going to be training. The weight
  # matrix will be initialized using random values following a (truncated)
  # normal distribution. The biases get initialized to zero.
  weights = tf.Variable(
    tf.truncated_normal([image_size * image_size, num_labels]))
  biases = tf.Variable(tf.zeros([num_labels]))
  
  # Training computation.
  # We multiply the inputs with the weight matrix, and add biases. We compute
  # the softmax and cross-entropy (it's one operation in TensorFlow, because
  # it's very common, and it can be optimized). We take the average of this
  # cross-entropy across all training examples: that's our loss.
  logits = tf.matmul(tf_train_dataset, weights) + biases
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
  
  # Optimizer.
  # We are going to find the minimum of this loss using gradient descent.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  # These are not part of training, but merely here so that we can report
  # accuracy figures as we train.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(
    tf.matmul(tf_valid_dataset, weights) + biases)
  test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [15]:
num_steps = 801

def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

with tf.Session(graph=graph) as session:
  # This is a one-time operation which ensures the parameters get initialized as
  # we described in the graph: random weights for the matrix, zeros for the
  # biases. 
  tf.global_variables_initializer().run()
  print('Initialized')
  for step in range(num_steps):
    # Run the computations. We tell .run() that we want to run the optimizer,
    # and get the loss value and the training predictions returned as numpy
    # arrays.
    _, l, predictions = session.run([optimizer, loss, train_prediction])
    if (step % 100 == 0):
      print('Loss at step %d: %f' % (step, l))
      print('Training accuracy: %.1f%%' % accuracy(
        predictions, trainLabels[:train_subset, :]))
      # Calling .eval() on valid_prediction is basically like calling run(), but
      # just to get that one numpy array. Note that it recomputes all its graph
      # dependencies.
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), validLabels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Loss at step 0: 19.594343
Training accuracy: 8.4%
Validation accuracy: 12.1%
Loss at step 100: 1.581522
Training accuracy: 76.5%
Validation accuracy: 66.5%
Loss at step 200: 0.862522
Training accuracy: 83.5%
Validation accuracy: 67.6%
Loss at step 300: 0.503483
Training accuracy: 88.4%
Validation accuracy: 67.6%
Loss at step 400: 0.309167
Training accuracy: 93.4%
Validation accuracy: 67.6%
Loss at step 500: 0.204319
Training accuracy: 97.1%
Validation accuracy: 67.8%
Loss at step 600: 0.146053
Training accuracy: 97.8%
Validation accuracy: 68.0%
Loss at step 700: 0.109719
Training accuracy: 98.6%
Validation accuracy: 68.3%
Loss at step 800: 0.087396
Training accuracy: 99.0%
Validation accuracy: 68.5%
Test accuracy: 77.4%


In [16]:
batch_size = 128

graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(validData)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  weights = tf.Variable(
    tf.truncated_normal([image_size * image_size, num_labels]))
  biases = tf.Variable(tf.zeros([num_labels]))
  
  # Training computation.
  logits = tf.matmul(tf_train_dataset, weights) + biases
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(
    tf.matmul(tf_valid_dataset, weights) + biases)
  test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [19]:
num_steps = 3001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (trainLabels.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = trainData[offset:(offset + batch_size), :]
    batch_labels = trainLabels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), validLabels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 23.252291
Minibatch accuracy: 3.1%
Validation accuracy: 11.7%
Minibatch loss at step 500: 1.431617
Minibatch accuracy: 75.8%
Validation accuracy: 74.7%
Minibatch loss at step 1000: 1.319434
Minibatch accuracy: 76.6%
Validation accuracy: 76.4%
Minibatch loss at step 1500: 1.054531
Minibatch accuracy: 77.3%
Validation accuracy: 77.0%
Minibatch loss at step 2000: 1.377252
Minibatch accuracy: 72.7%
Validation accuracy: 77.1%
Minibatch loss at step 2500: 1.084431
Minibatch accuracy: 75.0%
Validation accuracy: 78.0%
Minibatch loss at step 3000: 0.827081
Minibatch accuracy: 81.2%
Validation accuracy: 78.7%
Test accuracy: 86.0%


In [26]:
## adding a hidden layer to SGD implementation

batch_size = 128
h1_size = 1024

def layerfunc(x,weights,biases):
  layer_1 = tf.add(tf.matmul(x,weights['h1']),biases['h1'])
  layer_1 = tf.nn.relu(layer_1)
  logits = tf.matmul(layer_1, weights['out']) + biases['out']
  return logits

graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(validData)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  #weights = tf.Variable(
  #  tf.truncated_normal([image_size * image_size, num_labels]))
  #biases = tf.Variable(tf.zeros([num_labels]))
  weights = {
    'h1' : tf.Variable(tf.truncated_normal([image_size * image_size, h1_size])),
    'out' : tf.Variable(tf.truncated_normal([h1_size,num_labels]))
  }
  biases = {
    'h1' : tf.Variable(tf.zeros([h1_size])),
    'out' : tf.Variable(tf.zeros([num_labels]))
  }
  
  # Training computation.
  #logits = tf.matmul(tf_train_dataset, weights) + biases
  logits = layerfunc(tf_train_dataset,weights,biases)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(layerfunc(tf_valid_dataset,weights,biases))
  test_prediction = tf.nn.softmax(layerfunc(tf_test_dataset,weights,biases))

In [27]:
num_steps = 3001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (trainLabels.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = trainData[offset:(offset + batch_size), :]
    batch_labels = trainLabels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation accuracy: %.1f%%" % accuracy(
        valid_prediction.eval(), validLabels))
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 333.228333
Minibatch accuracy: 7.0%
Validation accuracy: 24.9%
Minibatch loss at step 500: 13.078899
Minibatch accuracy: 79.7%
Validation accuracy: 81.5%
Minibatch loss at step 1000: 5.466528
Minibatch accuracy: 84.4%
Validation accuracy: 80.6%
Minibatch loss at step 1500: 9.325289
Minibatch accuracy: 85.2%
Validation accuracy: 81.6%
Minibatch loss at step 2000: 5.514383
Minibatch accuracy: 85.9%
Validation accuracy: 81.4%
Minibatch loss at step 2500: 6.626647
Minibatch accuracy: 87.5%
Validation accuracy: 81.3%
Minibatch loss at step 3000: 3.771474
Minibatch accuracy: 89.1%
Validation accuracy: 82.5%
Test accuracy: 89.0%
