# Digit recognizer

Dataset: [https://www.kaggle.com/c/digit-recognizer/data](https://www.kaggle.com/c/digit-recognizer/data)

In [None]:
# Càrrega dels mòduls

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import cross_val_score, GridSearchCV

from keras.optimizers import SGD
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout
from keras.wrappers.scikit_learn import KerasClassifier

In [None]:
# Càrrega de dades

train = np.loadtxt('train.csv', delimiter=',', skiprows=1)
test = np.loadtxt('test.csv', delimiter=',', skiprows=1)

X = train[:,1:]
y = train[:,0].astype('int')

In [None]:
# Transformació de les dades a l'interval [0,1]

X = X/255
X_test = test/255

In [None]:
# Visualització d'una mostra de les dades

fig, ax = plt.subplots(1,15, figsize=(15,15))
for i in range(15):
    ax[i].imshow(X[i].reshape(28,28), cmap='gray')
    ax[i].set_title(y[i])
plt.show()

In [None]:
# Preparació de les dades

X = X.reshape(X.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

y = to_categorical(y, 10)

input_shape = (28, 28, 1)

In [None]:
# Optimització de la taxa d'aprenentatge

%time
def xarxaNeuronal(lr):
    model = Sequential()
    model.add(Conv2D(32, (5, 5), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(1, 1)))
    model.add(Dropout(0.5))
    model.add(Conv2D(128, (7, 7), activation='relu'))
    model.add(MaxPooling2D(pool_size=(1, 1)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer=SGD(lr=lr), loss='categorical_crossentropy',
                  metrics=['accuracy'])   
    return model

parameters = {'lr':(0.01, 0.1)}
nn = KerasClassifier(build_fn=xarxaNeuronal, epochs=1, batch_size=16, verbose=0)
search = GridSearchCV(nn, parameters, cv=3, return_train_score=False)
search.fit(X, y, verbose=0)

results = pd.DataFrame(search.cv_results_)
atributes=['params','mean_test_score', 'rank_test_score']
results[atributes].sort_values(by='mean_test_score', ascending=False)

In [None]:
# Creació i entrenament de la xarxa neuronal

model = Sequential()
model.add(Conv2D(32, (5, 5), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(1, 1)))
model.add(Dropout(0.5))
model.add(Conv2D(128, (7, 7), activation='relu'))
model.add(MaxPooling2D(pool_size=(1, 1)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='sigmoid'))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer=SGD(lr=0.1), loss='categorical_crossentropy', metrics=['accuracy']) 

model.fit(X, y, epochs=50, batch_size=16, verbose=0)

In [None]:
# Predicció del conjunt de prova

y_pred_nn = model.predict(X_test)
y_pred = np.argmax(y_pred_nn, 1)

In [None]:
# Visualització dels resultats

fig, ax = plt.subplots(1,15, figsize=(15,15))
for i in range(15):
    ax[i].imshow(test[i].reshape(28,28), cmap='gray')
    ax[i].set_title("nn: {}".format(y_pred[i]))
plt.show()

In [None]:
# Emmagatzematge dels resultats

results = np.column_stack((range(1,y_pred.size+1),y_pred))
np.savetxt('resultats.csv', results, delimiter=',', comments="", fmt="%d", header="ImageId,Label")