In [None]:
import numpy as np
import pandas as pd
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D 
from keras.layers.convolutional import MaxPooling2D 
from keras.utils import np_utils
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
K.set_image_dim_ordering('th')
import matplotlib.pyplot as plt

In [None]:
def larger_model():
    # create model
    model = Sequential()
    model.add(Conv2D(32, (3, 3), 
                     input_shape=(3, 150, 150), 
                     activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())
    
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(12, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 
    return model

In [None]:
batch_size = 32

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(rotation_range=180, 
                                   width_shift_range=0.2, 
                                   height_shift_range=0.2, 
                                   rescale=1./255, 
                                   shear_range=0.2, 
                                   zoom_range=0.2, 
                                   horizontal_flip=True, 
                                   vertical_flip=True,
                                   fill_mode='nearest')

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        'CAX_Superhero_Train/train',  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='categorical')

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'CAX_Superhero_Train/test',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='categorical')

In [None]:
early_stopping_monitor = EarlyStopping(monitor='val_acc', patience=5, verbose=1)
# build the model
model = larger_model()

model.fit_generator(train_generator, 
                    steps_per_epoch=5590//batch_size, 
                    epochs=50, 
                    validation_data=validation_generator, 
                    validation_steps=2455//batch_size, 
                    callbacks=[early_stopping_monitor], 
                    verbose=1)

In [None]:
datagen = ImageDataGenerator(rescale=1./255)

generator = datagen.flow_from_directory(
        'CAX_Superhero_Test',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode=None,  # only data, no labels
        shuffle=False)  # keep data in same order as labels

In [None]:
probabilities = model.predict_generator(generator, verbose=1)

In [None]:
y_classes = probabilities.argmax(axis=-1)

In [None]:
predictions = []
for i in range(0, len(y_classes)):    
    for hero, cls in validation_generator.class_indices.iteritems():
        if cls == y_classes[i]:
            predictions.append([generator.filenames[i], hero])

In [None]:
temp_submission = pd.DataFrame(data=predictions, 
                          columns=['filename', 'Superhero'])

In [None]:
temp_submission['filename'] = temp_submission['filename'].str.slice(5)
temp_submission['filename'] = temp_submission['filename'].str.slice(0,-4)

In [None]:
submission = pd.read_csv('Superhero_Submission_Format.csv')

In [None]:
submission = submission.drop(['Superhero'], axis=1)

In [None]:
submission = pd.merge(submission, temp_submission,
              on='filename') 

In [None]:
submission.Superhero.unique()

In [None]:
submission.to_csv('submission.csv', index=False)