In [25]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [26]:
file = '/content/drive/MyDrive/datasets/digit-recognizer/train.csv'
df = pd.read_csv(file)


In [27]:
from sklearn.model_selection import train_test_split
y = df['label']
X = df.drop(['label'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, stratify=y)

In [28]:
X_train = X_train.values
X_test = X_test.values
y_train = y_train.values
y_test = y_test.values


In [29]:
X_train = X_train.reshape(X_train.shape[0], 28,28,1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')

X_train = X_train/255.0
X_test = X_test/255.0

#tf.reshape(X_train, [28,28,1,-1])

In [30]:
X = np.concatenate((X_train, X_test))
y = np.concatenate((y_train,y_test))

y = tf.keras.utils.to_categorical(y,10)

In [31]:
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    validation_split=0.2
)
datagen.fit(X)

In [32]:
"""from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
y = lb.fit_transform(y)"""

'from sklearn import preprocessing\nlb = preprocessing.LabelBinarizer()\ny = lb.fit_transform(y)'

In [33]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (5,5), activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.Conv2D(32, (5,5), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])
from tensorflow.keras.optimizers import RMSprop

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 24, 24, 32)        832       
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 20, 20, 32)        25632     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 10, 10, 32)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 8, 8, 64)          18496     
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 6, 6, 64)          36928     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 3, 3, 64)          0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 576)              

In [34]:
history = model.fit(
      datagen.flow(X,y, batch_size=32, subset='training'),
      steps_per_epoch=X_train.shape[0]//32,
      validation_data=datagen.flow(X,y,batch_size=8,subset='validation'),
      batch_size=32,  # 2000 images = batch_size * steps
      epochs=15,  # 1000 images = batch_size * steps
      verbose=2)


Epoch 1/15
1050/1050 - 42s - loss: 0.3523 - accuracy: 0.8857 - val_loss: 0.1349 - val_accuracy: 0.9588
Epoch 2/15
1050/1050 - 13s - loss: 0.1072 - accuracy: 0.9666 - val_loss: 0.1077 - val_accuracy: 0.9673
Epoch 3/15
1050/1050 - 13s - loss: 0.0812 - accuracy: 0.9759 - val_loss: 0.0717 - val_accuracy: 0.9770
Epoch 4/15
1050/1050 - 13s - loss: 0.0670 - accuracy: 0.9798 - val_loss: 0.0859 - val_accuracy: 0.9750
Epoch 5/15
1050/1050 - 13s - loss: 0.0601 - accuracy: 0.9812 - val_loss: 0.0818 - val_accuracy: 0.9755
Epoch 6/15
1050/1050 - 13s - loss: 0.0531 - accuracy: 0.9847 - val_loss: 0.0623 - val_accuracy: 0.9796
Epoch 7/15
1050/1050 - 13s - loss: 0.0485 - accuracy: 0.9857 - val_loss: 0.0590 - val_accuracy: 0.9821
Epoch 8/15
1050/1050 - 12s - loss: 0.0436 - accuracy: 0.9870 - val_loss: 0.0514 - val_accuracy: 0.9838
Epoch 9/15
1050/1050 - 13s - loss: 0.0437 - accuracy: 0.9875 - val_loss: 0.0486 - val_accuracy: 0.9854
Epoch 10/15
1050/1050 - 13s - loss: 0.0413 - accuracy: 0.9876 - val_loss:

In [35]:

file2 = '/content/drive/MyDrive/datasets/digit-recognizer/test.csv'
df2 = pd.read_csv(file2).values

In [36]:
df2 = df2.reshape(df2.shape[0], 28, 28, 1).astype('float32')

df2 = df2/255.0


In [37]:
testdata = ImageDataGenerator()
testdata.fit(df2)

In [40]:
predictions = model.predict(testdata.flow(df2, shuffle=False))

In [41]:
print(predictions[0])

[3.6505113e-10 1.3311717e-09 1.0000000e+00 4.7043156e-09 1.5209128e-09
 1.8953461e-13 3.5532675e-12 3.3612164e-08 2.9967864e-10 1.9191142e-10]


In [43]:
pred = predictions
pred = [np.argmax(a, axis=None, out=None) for a in pred]
print(pred[0])

2


In [44]:
df3 = pd.DataFrame(data=pred)
df3.to_csv('/content/drive/MyDrive/datasets/digit-recognizer/submissioncnn3.csv')