In [1]:
import tensorflow as tf

In [2]:
import numpy as np 
import pandas as pd 

In [3]:
train = pd.read_csv("C:/Users/Matt/Desktop/School/Personal_Study/Deep_Learning/Udemy_Complete_Guide_to_TensorFlow/Kaggle_MNIST_Data/train.csv")
test = pd.read_csv("C:/Users/Matt/Desktop/School/Personal_Study/Deep_Learning/Udemy_Complete_Guide_to_TensorFlow/Kaggle_MNIST_Data/test.csv")

In [4]:
X = train.loc[:, train.columns != 'label']
y = train['label']

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=42)

In [6]:
X_train = X_train.as_matrix() 
y_train = y_train.as_matrix() 

In [7]:
X_val = X_val.as_matrix() 
y_val = y_val.as_matrix() 

In [8]:
X_test = test.as_matrix()

In [9]:
# Used to one-hot encode the 10 labels
def one_hot_encode_labels(vector, total_labels=10):
    # extract number of rows/observations
    number_of_obs = len(vector)
    # initialize matrix that will represent our one-hot encoding
    output = np.zeros((number_of_obs,total_labels))
    # for each row, set the column to 1 based on the label
    output[range(number_of_obs), vector] = 1
    # return final matrix
    return output

In [10]:
class MNIST_Data_Prep():
    
    def __init__(self):
        self.i = 0
        
        # Initialize some empty variables for later on
        self.training_images = None
        self.training_labels = None
        
        self.val_images = None
        self.val_labels = None
        
        self.test_images = None
        
    def set_up_images(self):
        
        print("Setting Up Training Images and Labels")
        
        # define training images
        self.training_images = X_train
        train_len = len(self.training_images)
        
        # Reshapes and normalizes training images
        # NOTE: the max value in the images is 255, so that is where the 255 comes from, it's the 'normalizing' step
        self.training_images = self.training_images.reshape(train_len,28,28,1).transpose(0,1,2,3)/255
        # One Hot Encodes the training labels
        self.training_labels = one_hot_encode_labels(y_train)
        
        print("Setting Up Validation Images and Labels")
        
        # define test images
        self.val_images = X_val
        val_len = len(self.val_images)
        
        # Reshapes and normalizes test images
        # NOTE: the max value in the images is 255, so that is where the 255 comes from, it's the 'normalizing' step
        self.val_images = self.val_images.reshape(val_len,28,28,1).transpose(0,1,2,3)/255
        # One Hot Encodes the training labels
        self.val_labels = one_hot_encode_labels(y_val)
        
        print("Setting Up Test Images and Labels")
        
        # define test images
        self.test_images = X_test
        test_len = len(self.test_images)
        
        # Reshapes and normalizes test images
        # NOTE: the max value in the images is 255, so that is where the 255 comes from, it's the 'normalizing' step
        self.test_images = self.test_images.reshape(test_len,28,28,1).transpose(0,1,2,3)/255
        
    def next_batch(self, batch_size):
        # self.i:self.i+batch_size - means grab training images from self.i up to self.i+batch_size
        #x = self.training_images[self.i:self.i+batch_size].reshape(batch_size,28,28,1)
        x = self.training_images[self.i:self.i+batch_size]
        y = self.training_labels[self.i:self.i+batch_size]
        self.i = (self.i + batch_size) % len(self.training_images)     
        return x, y

In [11]:
### SETTING UP PLACEHOLDERS FOR OUR TRAINING DATA
# None - because this is dictated by the batch size; 28, 28 - represents 28x28 pixels; 1 - # of color channels
# i.e. [images, height, width, channels]
x = tf.placeholder(tf.float32,shape=[None,28,28,1])
y_true = tf.placeholder(tf.float32,shape=[None,10])

In [12]:
# FOR DROPOUT, % OF NEURONS WE WANT TO KEEP
hold_prob = tf.placeholder(tf.float32)

In [13]:
# shape depends on the size of our images and size of our tensors

def init_weights(shape):
    # tf.truncated_normal = Outputs random values from a truncated normal distribution.
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2by2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

def convolutional_layer(input_x, shape):
    # initalize the weights, passing along the shape
    W = init_weights(shape)
    # initalize the biases, passing along the number of channels, which is located in position 3 in list 'shape'
    b = init_weights([shape[3]])
    return tf.nn.relu(conv2d(input_x, W) + b)

# this is just a regular MLP layer, this doesn't flatten the data, only runs a MLP layer on a flattened layer
def normal_full_layer(input_layer, size):
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer, W) + b

In [14]:
convo_1 = convolutional_layer(x,shape=[4,4,1,28])
convo_1_pooling = max_pool_2by2(convo_1)

In [15]:
convo_2 = convolutional_layer(convo_1_pooling,shape=[4,4,28,64])
convo_2_pooling = max_pool_2by2(convo_2)

In [16]:
convo_2_flat = tf.reshape(convo_2_pooling,[-1,7*7*64])

In [17]:
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))

In [18]:
full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)

In [19]:
y_pred = normal_full_layer(full_one_dropout,10)

In [20]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

In [21]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
trainer = optimizer.minimize(cross_entropy)

In [22]:
ch = MNIST_Data_Prep()
ch.set_up_images()

Setting Up Training Images and Labels
Setting Up Validation Images and Labels
Setting Up Test Images and Labels


In [23]:
init = tf.global_variables_initializer()

In [24]:
# Create a saver.
saver = tf.train.Saver()

In [25]:
with tf.Session() as sess:
    
    sess.run(init)
    
    for i in range(501):
        
        batch = ch.next_batch(100)
        
        # remember that next_batch returns a tuple (x,y) where x is the training examples and y is the labels
        # also reminder that hold_prob we created as a placeholder for our dropout rate, here we define that we want our
        # dropout rate to be 50%
        sess.run(trainer,feed_dict={x:batch[0],y_true:batch[1],hold_prob:0.5})
        
        # Save our model for inference later on
        saver.save(sess, 'C:/Users/Matt/Desktop/School/Personal_Study/Deep_Learning/Udemy_Complete_Guide_to_TensorFlow/Kaggle_MNIST_Model/my_mnist_cnn')
        
        if i%100 == 0:
            
            # print what step we're on
            print("STEP: {}".format(i))
            
            # Now we want to also print our TEST ACCURACY score
            # to do this we will set up the calculations then run another session that feeds in the test
            # images and test labels
            # remember that y_pred is part of 'train' which we defined when we defined our Convolutional Network Architecture
            
            # if the index of the highest probability matches the index of the true value (i.e. the max value being 1)
            # then we return boolean TRUE, otherwise we return boolean FALSE
            matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))
            
            # tf.reduce_mean is basically just the tenforflow function for saying "calculate the mean"
            # tf.cast is used to cast the boolean values TRUE & FALSE to binary float values 1.0 and 0.0
            acc = tf.reduce_mean(tf.cast(matches,tf.float32))
            
            # hold_prob = 1 because we don't want to hold out any neurons during inference
            print(sess.run(acc,feed_dict={x:ch.val_images,y_true:ch.val_labels,hold_prob:1.0}))
            print('\n')

STEP: 0
0.148196


STEP: 100
0.957431


STEP: 200
0.970851


STEP: 300
0.977417


STEP: 400
0.976551


STEP: 500
0.982035




In [28]:
with tf.Session() as session:
    saver.restore(session, 'C:/Users/Matt/Desktop/School/Personal_Study/Deep_Learning/Udemy_Complete_Guide_to_TensorFlow/Kaggle_MNIST_Model/my_mnist_cnn')
    prediction = session.run(tf.argmax(y_pred, 1), feed_dict={x:ch.test_images,hold_prob:1.0})

INFO:tensorflow:Restoring parameters from C:/Users/Matt/Desktop/School/Personal_Study/Deep_Learning/Udemy_Complete_Guide_to_TensorFlow/Kaggle_MNIST_Model/my_mnist_cnn


In [29]:
prediction

array([2, 0, 9, ..., 3, 9, 2], dtype=int64)

In [30]:
len(prediction)

28000

In [56]:
final = np.vstack((np.arange(1,28001),prediction))

In [62]:
final_df = pd.DataFrame(data=final.transpose(),columns=['ImageId','Label'])

In [64]:
final_df.to_csv('C:/Users/Matt/Desktop/School/Personal_Study/Deep_Learning/Udemy_Complete_Guide_to_TensorFlow/Kaggle_MNIST_Preds/mjy_mnist_preds_cnn_v1.csv',index=False)