In [2]:
"""
Train Metamorph neural network
# https://transfer.sh/12Z5E1/weights.hdf5
"""
import keras.backend as K
import numpy as np
import pandas as pd
import tensorflow as tf

from keras.applications.vgg19 import preprocess_input as vgg_preprocess_input
from keras.callbacks import (EarlyStopping, LearningRateScheduler,
                             ModelCheckpoint)
from keras.models import load_model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image, ImageFile

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

from keras.applications.vgg19 import VGG19

from keras.layers import GlobalAveragePooling2D, Dense, Dropout
from keras.models import Model, Sequential

from keras.utils.data_utils import get_file


ImageFile.LOAD_TRUNCATED_IMAGES = True


###################################################################################################
# Arguments for setting parameters while running array batch job
###################################################################################################


NAME = "VGG19"

# Set verbosity
VERBOSITY = 1

# Set model configuration
LOSS = "categorical_crossentropy"
METRICS = ["accuracy"]

# Dataset folder information
TRAINING_CSV = "./training-labels.csv"
DATASET_FOLDER = "./output_combined2"


WEIGHTS_FOLDER = "."


# WEIGHTS_PATH = get_file('pretrained_weights',
#                         'https://transfer.sh/12Z5E1/weights.hdf5')

WEIGHTS_PATH = "/home/vinoth/weights.hdf5"

# Image augmentation parameters

HEIGHT = 256
WIDTH = 256
DEPTH = 3
SHIFT = 20.0
ROTATION = 10.0
VAL_AUG_FACTOR = 0.2

# Hyperparameters
# Set epochs from args if set else from config file
EPOCHS = 3

BATCH_SIZE = 21
LEARNING_RATE = 0.001
DROP_EVERY = 15
DROP_FACTOR = 0.25
MOMENTUM = 0.9

# Image generator information
TEST_SPLIT = 0.004
VALIDATION_SPLIT = 0.1

##################################################################################################
# Read details from CSV
###################################################################################################

DATASET = pd.read_csv(TRAINING_CSV, dtype=str)
TRAIN_VALIDATION, TEST = train_test_split(DATASET, test_size=TEST_SPLIT)
TRAIN, VALIDATION = train_test_split(
    TRAIN_VALIDATION, test_size=VALIDATION_SPLIT)

DATASET.head()


###################################################################################################
#  Create data generator to augment images for training and validation
###################################################################################################

preprocessing_function = vgg_preprocess_input

TRAINING_DATA_GENERATOR = ImageDataGenerator(rotation_range=ROTATION,
                                             width_shift_range=SHIFT,
                                             height_shift_range=SHIFT,
                                             preprocessing_function=preprocessing_function)

VALIDATION_DATA_GENERATOR = ImageDataGenerator(rotation_range=ROTATION *
                                               (1+VAL_AUG_FACTOR),
                                               width_shift_range=SHIFT *
                                               (1+VAL_AUG_FACTOR),
                                               height_shift_range=SHIFT *
                                               (1+VAL_AUG_FACTOR),
                                               preprocessing_function=preprocessing_function)

TEST_DATA_GENERATOR = ImageDataGenerator(
    preprocessing_function=preprocessing_function)

COLOR_MODE = "grayscale" if DEPTH == 1 else "rgb"

print("[INFO] Creating training data generator")
TRAINING_DATA = TRAINING_DATA_GENERATOR.flow_from_dataframe(dataframe=TRAIN,
                                                            directory=DATASET_FOLDER,
                                                            x_col="Filename",
                                                            y_col="Drscore",
                                                            class_mode="categorical",
                                                            color_mode=COLOR_MODE,
                                                            target_size=(
                                                                WIDTH, HEIGHT),
                                                            batch_size=BATCH_SIZE)

print("[INFO] Creating validation data generator")
VALIDATION_DATA = VALIDATION_DATA_GENERATOR.flow_from_dataframe(dataframe=VALIDATION,
                                                                directory=DATASET_FOLDER,
                                                                x_col="Filename",
                                                                y_col="Drscore",
                                                                class_mode="categorical",
                                                                color_mode=COLOR_MODE,
                                                                target_size=(
                                                                    WIDTH, HEIGHT),
                                                                batch_size=BATCH_SIZE)

print("[INFO] Creating test data generator")
TEST_DATA = TEST_DATA_GENERATOR.flow_from_dataframe(dataframe=TEST,
                                                    directory=DATASET_FOLDER,
                                                    x_col="Filename",
                                                    y_col="Drscore",
                                                    class_mode="categorical",
                                                    target_size=(
                                                        WIDTH, HEIGHT),
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False)

NUM_OF_TRAINING_SAMPLES = 64  # len(TRAIN)
NUM_OF_VALIDATION_SAMPLES = 64  # len(VALIDATION)
NUM_OF_TEST_SAMPLES = len(TEST_DATA.classes)//BATCH_SIZE+1
CLASSES = 5


###################################################################################################
# Cohen Kappa metrics
###################################################################################################


def cohen_kappa(y_true, y_pred):
    y_true_classes = tf.argmax(y_true, 1)
    y_pred_classes = tf.argmax(y_pred, 1)
    return tf.contrib.metrics.cohen_kappa(y_true_classes, y_pred_classes, CLASSES)[1]


###################################################################################################
# Compile MetaMorph model
###################################################################################################

BASE_MODEL = VGG19(include_top=False, input_shape=(HEIGHT, WIDTH, DEPTH))
MODEL = Sequential()
MODEL.add(BASE_MODEL)
MODEL.add(GlobalAveragePooling2D())
MODEL.add(Dense(1024, activation='relu'))
MODEL.add(Dense(512, activation='relu'))
MODEL.add(Dense(CLASSES, activation='softmax'))

MODEL.load_weights(WEIGHTS_PATH)

OPTIMISER = SGD(lr=LEARNING_RATE, momentum=MOMENTUM)

MODEL.compile(loss=LOSS, optimizer=OPTIMISER, metrics=[*METRICS, cohen_kappa])

K.get_session().run(tf.local_variables_initializer())


W0722 23:03:23.343448 140250897299264 deprecation_wrapper.py:119] From /home/vinoth/Apps/anaconda/envs/sic/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0722 23:03:23.364688 140250897299264 deprecation_wrapper.py:119] From /home/vinoth/Apps/anaconda/envs/sic/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0722 23:03:23.368232 140250897299264 deprecation_wrapper.py:119] From /home/vinoth/Apps/anaconda/envs/sic/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0722 23:03:23.402519 140250897299264 deprecation_wrapper.py:119] From /home/vinoth/Apps/anaconda/envs/sic/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is depre

[INFO] Creating training data generator
Found 12679 validated image filenames belonging to 5 classes.
[INFO] Creating validation data generator
Found 1409 validated image filenames belonging to 5 classes.
[INFO] Creating test data generator
Found 57 validated image filenames belonging to 5 classes.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


W0722 23:04:32.477871 140250897299264 deprecation_wrapper.py:119] From /home/vinoth/Apps/anaconda/envs/sic/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0722 23:04:32.478415 140250897299264 deprecation_wrapper.py:119] From /home/vinoth/Apps/anaconda/envs/sic/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

W0722 23:04:33.402594 140250897299264 deprecation_wrapper.py:119] From /home/vinoth/Apps/anaconda/envs/sic/lib/python3.6/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0722 23:04:34.567988 140250897299264 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blo

In [3]:
import os

In [35]:
TEST_FILES = [ [filename, "0"] for filename in os.listdir("./Test")[:10]]
TEST = pd.DataFrame(TEST_FILES, columns=["Id", "Expected"])
TEST

Unnamed: 0,Id,Expected
0,test_image533.jpeg,0
1,test_image949.jpg,0
2,test_image720.jpeg,0
3,test_image730.jpeg,0
4,test_image848.jpeg,0
5,test_image912.jpg,0
6,test_image17.tif,0
7,test_image114.tif,0
8,test_image184.tif,0
9,test_image977.jpg,0


In [36]:
TEST_DATA_GENERATOR = ImageDataGenerator(preprocessing_function=preprocessing_function)
print("[INFO] Creating test data generator")
TEST_DATA = TEST_DATA_GENERATOR.flow_from_dataframe(dataframe=TEST,
                                                    directory="./Test",
                                                    x_col="Id",
                                                    y_col="Expected",
                                                    class_mode="categorical",
                                                    target_size=(HEIGHT, WIDTH),
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False)

[INFO] Creating test data generator
Found 10 validated image filenames belonging to 1 classes.


In [37]:
NUM_OF_TEST_SAMPLES = len(TEST_DATA.classes)//BATCH_SIZE+1

In [38]:
PREDICTIONS = MODEL.predict_generator(generator=TEST_DATA,
                                      steps=NUM_OF_TEST_SAMPLES,
                                      verbose=VERBOSITY)
Y_PREDICTIONS = np.argmax(PREDICTIONS, axis=1)



In [39]:
Y_PREDICTIONS

array([0, 3, 0, 0, 4, 3, 3, 3, 3, 2])

In [40]:
TEST["Expected"] = Y_PREDICTIONS

In [43]:
TEST.to_csv("Submission.csv", index=False)