# Digit Recognizer using vgg16
* https://www.kaggle.com/c/digit-recognizer

In [59]:
import numpy as np
import pandas as pd

## prepareing data
Download data from https://www.kaggle.com/c/digit-recognizer/data

In [60]:
n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

validation_size = 2000

In [61]:
train = pd.read_csv('/home/tsu-nera/repo/kaggle/input/digit-recognizer/train.csv')
test  = pd.read_csv('/home/tsu-nera/repo/kaggle/input/digit-recognizer/test.csv')

In [62]:
(train.shape, test.shape)

((42000, 785), (28000, 784))

split train data to labels and pixels.

In [90]:
features = (train.ix[:,1:].values).astype('float32')
labels = pd.get_dummies(train.ix[:,0]).astype('float32')
(features.shape, labels.shape)

((42000, 784), (42000, 10))

In [91]:
# split data into training & validation
valid_features = features[:validation_size]
valid_labels = labels[:validation_size]

train_features = features[validation_size:]
train_labels = labels[validation_size:]
(train_features.shape, train_labels.shape, valid_features.shape, valid_labels.shape)

((40000, 784), (40000, 10), (2000, 784), (2000, 10))

In [92]:
test_features = (test.values).astype('float32')
(test_features.shape)

(28000, 784)

In [93]:
features = features.reshape(-1,28,28)
valid_features = valid_features.reshape(-1,28,28)
test_features = test_features.reshape(-1, 28, 28)
(features.shape, valid_features.shape, test_features.shape)

((42000, 28, 28), (2000, 28, 28), (28000, 28, 28))

In [94]:
features = np.expand_dims(features,3)
valid_features = np.expand_dims(valid_features,3)
test_features = np.expand_dims(test_features,3)

In [95]:
features = np.concatenate([features, features, features], axis=3)
valid_features = np.concatenate([valid_features, valid_features, valid_features], axis=3)
test_features = np.concatenate([test_features, test_features, test_features], axis=3)
(features.shape, valid_features.shape, test_features.shape)

((42000, 28, 28, 3), (2000, 28, 28, 3), (28000, 28, 28, 3))

## Make a TensorFlow Graph

Make a first convolultion layer.

In [76]:
import keras
from keras import backend as K
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.layers import Input, Flatten, Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

In [77]:
def pretrained_model(img_shape, num_classes):
    model_vgg16_conv = VGG16(weights='imagenet', include_top=False)
    #model_vgg16_conv.summary()
    
    #Create your own input format
    keras_input = Input(shape=img_shape, name = 'image_input')
    
    #Use the generated model 
    output_vgg16_conv = model_vgg16_conv(keras_input)
    
    #Add the fully-connected layers 
    x = Flatten(name='flatten')(output_vgg16_conv)
    x = Dropout(0.8)(x)
    x = Dense(256, activation=layer_type, name='fc1')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.8)(x)
    x = Dense(256, activation=layer_type, name='fc2')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.8)(x)
    x = Dense(num_classes, activation='softmax', name='predictions')(x)
    
    #Create your own model 
    pretrained_model = Model(inputs=keras_input, outputs=x)
    pretrained_model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    return pretrained_model

In [98]:
#K.set_image_dim_ordering('tf')
ftmodel = pretrained_model((28,28,3), 10)

ftmodel.summary()

ValueError: Negative dimension size caused by subtracting 2 from 1 for 'MaxPool_104' (op: 'MaxPool') with input shapes: [?,1,1,512].

## Training

In [17]:
batch_size = 64

In [18]:
gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                            height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(features, labels, batch_size=batch_size)
test_batches = gen.flow(valid_features, valid_labels, batch_size=batch_size)

In [19]:
model.fit_generator(batches, batches.n, nb_epoch=1,
                    validation_data=test_batches, nb_val_samples=test_batches.n)

Epoch 1/1


ValueError: Error when checking model input: expected lambda_input_1 to have shape (None, 3, 224, 224) but got array with shape (64, 3, 28, 28)

## Testing

In [None]:
classes = model.predict_classes(test_features)

In [None]:
classes[:5]

## Write to file

In [None]:
submissions = pd.DataFrame({"ImageId": list(range(1, len(classes)+1)),
                             "Label": classes})
submissions.to_csv("submission.csv", index=False, header=True)