In [1]:
import numpy as np
import pandas as pd
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers.convolutional import Conv2D, MaxPooling2D 
from keras import optimizers
from keras.utils import np_utils
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
K.set_image_dim_ordering('th')
import matplotlib.pyplot as plt

  (fname, cnt))
  (fname, cnt))
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def cnn_model():
    # create model
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', input_shape=(3, 150, 150)))
    model.add(Activation('relu'))

    model.add(Conv2D(32,(3,3)))
    model.add(Activation('relu'))

    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64,(3,3), padding='same'))
    model.add(Activation('relu'))

    model.add(Conv2D(64,(3, 3)))
    model.add(Activation('relu'))

    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(12))
    model.add(Activation('softmax'))

    # Compile model
    rmsprop = optimizers.RMSprop(lr=0.001)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) 
    return model

In [3]:
batch_size = 32

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(rotation_range=180, 
                                   width_shift_range=0.2, 
                                   height_shift_range=0.2, 
                                   rescale=1./255, 
                                   shear_range=0.2, 
                                   zoom_range=0.2, 
                                   horizontal_flip=True, 
                                   vertical_flip=True,
                                   fill_mode='nearest')

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        'CAX_Superhero_Train_Original/train',  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='categorical')

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'CAX_Superhero_Train_Original/test',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='categorical')

Found 3800 images belonging to 12 classes.
Found 1633 images belonging to 12 classes.


In [4]:
early_stopping_monitor = EarlyStopping(monitor='val_acc', patience=10, verbose=1)
# build the model
model = cnn_model()

model.fit_generator(train_generator, 
                    steps_per_epoch=3800//batch_size, 
                    epochs=50, 
                    validation_data=validation_generator, 
                    validation_steps=1633//batch_size, 
                    callbacks=[early_stopping_monitor], 
                    verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 00039: early stopping


<keras.callbacks.History at 0x7f1f0475dbd0>

In [6]:
datagen = ImageDataGenerator(rescale=1./255)

generator = datagen.flow_from_directory(
        'CAX_Superhero_Test',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode=None,  # only data, no labels
        shuffle=False)  # keep data in same order as labels

Found 3375 images belonging to 1 classes.


In [7]:
probabilities = model.predict_generator(generator, verbose=1)



In [8]:
y_classes = probabilities.argmax(axis=-1)

In [9]:
predictions = []
for i in range(0, len(y_classes)):    
    for hero, cls in validation_generator.class_indices.iteritems():
        if cls == y_classes[i]:
            predictions.append([generator.filenames[i], hero])

In [10]:
temp_submission = pd.DataFrame(data=predictions, 
                          columns=['filename', 'Superhero'])

In [11]:
temp_submission['filename'] = temp_submission['filename'].str.slice(5)
temp_submission['filename'] = temp_submission['filename'].str.slice(0,-4)

In [12]:
submission = pd.read_csv('Superhero_Submission_Format.csv')

In [13]:
submission = submission.drop(['Superhero'], axis=1)

In [14]:
submission = pd.merge(submission, temp_submission,
              on='filename') 

In [15]:
submission.Superhero.unique()

array(['spider_man', 'super_man', 'iron_man', 'black_panther', 'aqua_man',
       'bat_man', 'captain_america', 'cat_woman', 'hulk', 'avengers',
       'ant_man', 'ghostrider'], dtype=object)

In [16]:
submission.to_csv('submission.csv', index=False)