# Model training and testing scripts with results sent to kaggle

## kaggle login with json and download data

In [None]:
from google.colab import files
file = files.upload()

In [3]:
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle

In [None]:
!kaggle competitions download -c digit-recognizer

## unzip data

In [5]:
import zipfile
with zipfile.ZipFile("train.csv.zip","r") as zip_ref:
    zip_ref.extractall()
with zipfile.ZipFile("test.csv.zip","r") as zip_ref:
    zip_ref.extractall()

## create dataset

In [28]:
import numpy as np
train = np.loadtxt('train.csv', skiprows=1, delimiter=',')
test = np.loadtxt('test.csv', skiprows=1, delimiter=',')

In [29]:
from keras import utils
input_shape = (28, 28, 1)

x_train = train[:, 1:]
x_train = x_train.reshape(x_train.shape[0], *input_shape)
x_train /= 255.0

x_test = test.reshape(test.shape[0], *input_shape)
x_test = x_test / 255.0

y_train = train[:, 0]
y_train = utils.to_categorical(y_train)

## split dataset

In [31]:
from sklearn.model_selection import train_test_split
random_seed = 2
X_train, X_val, Y_train, Y_val = train_test_split(x_train, y_train, test_size = 0.1, random_state=random_seed)

## augment dataset

In [33]:
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        rotation_range=10,  
        zoom_range = 0.1,  
        width_shift_range=0.1, 
        height_shift_range=0.1)

## create model

In [34]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten 

model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))


model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation = 'softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

## fit model

In [36]:
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
model_cnn_save_path = 'cnn.h5'
сheckpoint = ModelCheckpoint(model_cnn_save_path, 
                              monitor='val_accuracy', 
                              save_best_only=True,
                              verbose=1)

In [37]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [39]:
batch_size=96

In [None]:
history = model.fit(datagen.flow(X_train,Y_train, batch_size=batch_size), 
                    epochs=30,
                    validation_data=(X_val, Y_val),
                    steps_per_epoch=X_train.shape[0] // batch_size,
                    verbose=1,
                    callbacks=[сheckpoint, learning_rate_reduction])

## test model

In [None]:
model.load_weights(model_cnn_save_path)
predictions = model.predict(x_test)
predictions = np.argmax(predictions, axis=1)
out = np.column_stack((range(1, predictions.shape[0]+1), predictions))

## send result to kaggle

In [46]:
np.savetxt('submission.csv', out, header='ImageId,Label', 
            comments='', fmt='%d,%d')

In [None]:
!kaggle competitions submit -c digit-recognizer -m 'decision' -f submission.csv

## visualizations

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['accuracy'], 
         label='accuracy')
plt.plot(history.history['val_accuracy'], 
         label='val_accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()