In [1]:
import pandas as pd
import numpy as np

from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *
from tensorflow.keras import optimizers

In [2]:
df_train = pd.read_csv(r'C:\Datasets\train.csv')
df_test = pd.read_csv(r'C:\Datasets\test.csv')

In [3]:
df_features = df_train.iloc[:, 1:785]
df_label = df_train.iloc[:, 0]

X_test = df_test.iloc[:, 0:784]

print(X_test.shape)

(28000, 784)


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_cv, y_train, y_cv = train_test_split(df_features, df_label, 
                                                test_size = 0.2,
                                                random_state = 1212)

X_train = X_train.as_matrix().reshape(33600, 784) #(33600, 784)
X_cv = X_cv.as_matrix().reshape(8400, 784) #(8400, 784)

X_test = X_test.as_matrix().reshape(28000, 784)

  from __future__ import print_function as _print_function
  
  from tensorflow.python.feature_column.feature_column_lib import Layer


In [5]:
# Feature Normalization 
X_train = X_train.astype('float32'); X_cv= X_cv.astype('float32'); X_test = X_test.astype('float32')
X_train /= 255; X_cv /= 255; X_test /= 255

# Convert labels to One Hot Encoded
num_digits = 10
y_train = keras.utils.to_categorical(y_train, num_digits)
y_cv = keras.utils.to_categorical(y_cv, num_digits)

In [6]:
# Input Parameters
n_input = 784 # number of features
n_hidden_1 = 300
n_hidden_2 = 100
n_hidden_3 = 100
n_hidden_4 = 200
num_digits = 10

In [7]:
def build_model():
    # Define the model - Sequential Model 
    # Activation function - ReLU, tf.nn.sigmoid
    # Multilayer neural network
    # First layer is input layer and the last one is output and all in b/w are hidden
    # with tf.device("/device:GPU:1"):
    Inp = Input(shape=(784,))
    x = Dense(n_hidden_1, activation='relu', name = "Hidden_Layer_1")(Inp)
    x = Dropout(0.3)(x)
    x = Dense(n_hidden_2, activation='relu', name = "Hidden_Layer_2")(x)
    x = Dropout(0.3)(x)
    x = Dense(n_hidden_3, activation='relu', name = "Hidden_Layer_3")(x)
    x = Dropout(0.3)(x)
    x = Dense(n_hidden_4, activation='relu', name = "Hidden_Layer_4")(x)
    output = Dense(num_digits, activation='softmax', name = "Output_Layer")(x)
        
    model = Model(Inp, output)
        
    learning_rate = 0.1
    sgd = optimizers.SGD(lr=learning_rate)
        
    model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
        
    return model

In [8]:
# Our model would have '6' layers - input layer, 4 hidden layer and 1 output layer
model = build_model()
model.summary() # We have 297,910 parameters to estimate

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
Hidden_Layer_1 (Dense)       (None, 300)               235500    
_________________________________________________________________
dropout (Dropout)            (None, 300)               0         
_________________________________________________________________
Hidden_Layer_2 (Dense)       (None, 100)               30100     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
Hidden_Layer_3 (Dense)       (None, 100)               10100     
_________

In [9]:
# Insert Hyperparameters
training_epochs = 100
batch_size = 100

In [10]:
# We rely on the plain vanilla Stochastic Gradient Descent as our optimizing methodology
# model.compile(loss='categorical_crossentropy',
#              optimizer='sgd',
#              metrics=['accuracy'])

In [11]:
history1 = model.fit(X_train, y_train,
                     batch_size = batch_size,
                     epochs = training_epochs,
                     verbose = 2,
                     validation_data=(X_cv, y_cv))

Train on 33600 samples, validate on 8400 samples
Epoch 1/100
33600/33600 - 2s - loss: 0.5963 - acc: 0.8072 - val_loss: 0.1847 - val_acc: 0.9457
Epoch 2/100
33600/33600 - 1s - loss: 0.2345 - acc: 0.9326 - val_loss: 0.1335 - val_acc: 0.9581
Epoch 3/100
33600/33600 - 1s - loss: 0.1731 - acc: 0.9493 - val_loss: 0.1170 - val_acc: 0.9667
Epoch 4/100
33600/33600 - 2s - loss: 0.1422 - acc: 0.9584 - val_loss: 0.1228 - val_acc: 0.9661
Epoch 5/100
33600/33600 - 2s - loss: 0.1205 - acc: 0.9650 - val_loss: 0.1056 - val_acc: 0.9714
Epoch 6/100
33600/33600 - 2s - loss: 0.1050 - acc: 0.9692 - val_loss: 0.0939 - val_acc: 0.9736
Epoch 7/100
33600/33600 - 2s - loss: 0.0965 - acc: 0.9710 - val_loss: 0.0944 - val_acc: 0.9740
Epoch 8/100
33600/33600 - 1s - loss: 0.0880 - acc: 0.9739 - val_loss: 0.0922 - val_acc: 0.9754
Epoch 9/100
33600/33600 - 2s - loss: 0.0817 - acc: 0.9751 - val_loss: 0.0986 - val_acc: 0.9742
Epoch 10/100
33600/33600 - 1s - loss: 0.0783 - acc: 0.9761 - val_loss: 0.0840 - val_acc: 0.9756


Epoch 86/100
33600/33600 - 2s - loss: 0.0156 - acc: 0.9960 - val_loss: 0.1011 - val_acc: 0.9833
Epoch 87/100
33600/33600 - 2s - loss: 0.0159 - acc: 0.9955 - val_loss: 0.1059 - val_acc: 0.9829
Epoch 88/100
33600/33600 - 2s - loss: 0.0166 - acc: 0.9948 - val_loss: 0.1078 - val_acc: 0.9823
Epoch 89/100
33600/33600 - 2s - loss: 0.0165 - acc: 0.9953 - val_loss: 0.1024 - val_acc: 0.9823
Epoch 90/100
33600/33600 - 2s - loss: 0.0134 - acc: 0.9956 - val_loss: 0.1017 - val_acc: 0.9838
Epoch 91/100
33600/33600 - 2s - loss: 0.0165 - acc: 0.9954 - val_loss: 0.1104 - val_acc: 0.9821
Epoch 92/100
33600/33600 - 1s - loss: 0.0146 - acc: 0.9962 - val_loss: 0.1066 - val_acc: 0.9818
Epoch 93/100
33600/33600 - 1s - loss: 0.0136 - acc: 0.9962 - val_loss: 0.1124 - val_acc: 0.9810
Epoch 94/100
33600/33600 - 2s - loss: 0.0130 - acc: 0.9960 - val_loss: 0.1095 - val_acc: 0.9826
Epoch 95/100
33600/33600 - 1s - loss: 0.0136 - acc: 0.9959 - val_loss: 0.1171 - val_acc: 0.9823
Epoch 96/100
33600/33600 - 2s - loss: 0.

In [12]:
test_pred = pd.DataFrame(model.predict(X_test, batch_size=200))
test_pred = pd.DataFrame(test_pred.idxmax(axis = 1))
test_pred.index.name = 'ImageId'
test_pred = test_pred.rename(columns = {0: 'Label'}).reset_index()
test_pred['ImageId'] = test_pred['ImageId'] + 1

test_pred.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3


In [13]:
test_pred.to_csv('mnist_submission.csv', index = False)