In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import Adam,RMSprop
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,BatchNormalization,Dropout
from keras.utils.np_utils import to_categorical


np.random.seed(5)

%matplotlib inline
plt.rcParams['figure.figsize'] = [10,7]

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import warnings
warnings.filterwarnings('ignore')

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
sample_subm = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

In [None]:
train

The given training data contains 42000 rows with 785 columns, indicating 784 pixel color values and 1 label column 

Taking the square root of pixel columns number gives us the image dimensions  

In [None]:
np.sqrt(train.shape[1]-1)

In [None]:
test

Check for any data inconsistencies -- missing data

In [None]:
train.isnull().any().sum()

In [None]:
test.isnull().any().sum()

None of the data is missing, let's proceed to data preparation

# Data Preparation

Organise the data into features and response

In [None]:
X_train = train.iloc[:,1:]
Y_train = train.label

Normalize the training and test data using grayscale normalization

In [None]:
X_train = X_train/255
test = test/255

In order to use the data with Keras, it needs to be reshaped from (N,784) to (N,28,28,1)

Where N is number or rows, 28x28 are the image dimensions and 1 indicates channels.  
For RGB images, there are 3 channels corresponding to each of R,B,G colors (0-255); For grayscale, there is only 1 channel.


In [None]:
# Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1)
X_train = X_train.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)

Let's take a look at some of the digits

In [None]:
plt.subplot(1,3,1)
plt.imshow(X_train[1][:,:,0]);
plt.subplot(1,3,2)
plt.imshow(X_train[3][:,:,0]);
plt.subplot(1,3,3)
plt.imshow(X_train[7][:,:,0]);

In order to use y_train as labels for individual digits, we must encode them using one-hot encoding;  
The final layer in the CNN will output a 10 dimension array (vector) with 1 for the predicted digit and 0 for others  

In [None]:
# Encode labels to one hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
Y_train = to_categorical(Y_train, num_classes = 10)

In [None]:
x_train,x_val,y_train,y_val=train_test_split(X_train,Y_train,test_size=0.1,random_state=42)

# Model Training and Testing

In [None]:
model=Sequential()
model.add(Conv2D(filters=32,kernel_size=(5,5),padding='Same',activation='relu',input_shape=(28,28,1)))
model.add(Conv2D(filters=32,kernel_size=(5,5),padding='Same',activation='relu'))
model.add(BatchNormalization(momentum=0.15))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=64,kernel_size=(5,5),padding='Same',activation='relu'))
model.add(Conv2D(filters=64,kernel_size=(5,5),padding='Same',activation='relu'))
model.add(BatchNormalization(momentum=0.15))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters=128,kernel_size=(5,5),padding='Same',activation='relu'))
model.add(Conv2D(filters=128,kernel_size=(5,5),padding='Same',activation='relu'))
model.add(BatchNormalization(momentum=0.15))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10,activation='softmax'))

In [None]:
model.compile(optimizer='adam',loss=['categorical_crossentropy'],metrics=['accuracy'])

In [None]:
# Fitting model without data augmentation -- accuracy 98.34
# history = model.fit(x_train,y_train,batch_size=20,epochs=5, validation_data=[x_test,y_test])

# Data Augmentation 

To prevent overfitting and to improve prediction score,  
we can increase the dataset size by adding rotated images of digits already available

In [None]:
# Generate images that are randomly shifted, zoomed, flipped and rotated

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(X_train)

In [None]:
# Fit the model
history = model.fit_generator(datagen.flow(X_train,Y_train, batch_size=20),epochs = 5, validation_data=[x_val,y_val])

The outputs are in the shape of (N,10), 10 columns indicating the probabilities of prediction belonging to each 'class' (digit)  
The predicted digit is the max of each of these columns

In [None]:
y_pred=model.predict(x_val)
y_pred=np.argmax(y_pred,axis=1)
y_val=np.argmax(y_val,axis=1)

In [None]:
y_pred

In [None]:
y_val

In [None]:
print('Cross validation accuracy :',accuracy_score(y_val,y_pred))

In [None]:
g = sns.heatmap(confusion_matrix(y_val,y_pred), annot=True, fmt='0.3g')

In [None]:
print('classification_report : \n',classification_report(y_val,y_pred))

# Test predictions

In [None]:
y_predict=model.predict(test)
y_predict=np.argmax(y_predict,axis=1)
print(y_predict)

In [None]:
submission = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

In [None]:
submission

In [None]:
submission['Label']=y_predict

In [None]:
submission.to_csv("submission.csv",index=False)