In [1]:
from __future__ import print_function
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
from collections import Counter, OrderedDict

from PIL import Image as PImage
from os import listdir
from pickle import dump
import matplotlib.pyplot as plt
import PIL, cv2, os, json, glob, h5py, keras, csv, gc, random
from IPython.display import SVG

import tensorflow as tf
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import backend as K
from keras.utils import to_categorical
from keras.backend.tensorflow_backend import set_session
from keras.callbacks import TensorBoard
from keras.applications.vgg16 import preprocess_input
from keras.utils.vis_utils import plot_model, model_to_dot

#import seaborn as sns

Using TensorFlow backend.


In [2]:
train_path = '../data/aia-mid-term-exam-image-classification/img_classification/train'
test_path = '../data/aia-mid-term-exam-image-classification/img_classification/test'
model_path = '../data/aia-mid-term-exam-image-classification/img_classification/model'

In [3]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 15440062113432327275
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15770068583
locality {
  bus_id: 1
  links {
  }
}
incarnation: 15516788639302961574
physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"
]


In [4]:
label_list = !ls {train_path}
label_list

['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

In [5]:
mtype_dict = {}
mtype_list = [None] * len(label_list)
with open('../data/aia-mid-term-exam-image-classification/mid_term_mapping.csv', newline='') as csvfile:
    _dict = csv.DictReader(csvfile)
    for idx, row in enumerate(_dict):
        mtype_dict[row['dirs'].strip()] = int(row['class'].strip())
        mtype_list[int(row['class'].strip())] = row['dirs'].strip()

In [6]:
mtype_list

['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

In [7]:
from keras.layers import *
from keras.optimizers import *
from keras.applications import *

from keras.preprocessing.image import img_to_array, load_img
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Conv2D, Dropout, BatchNormalization
from keras import regularizers
from keras.optimizers import Adam, SGD
from keras.losses import categorical_crossentropy
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger

import keras.applications.xception as Xception
import keras.applications.vgg16 as VGG16
import keras.applications.resnet50 as resnet50
import keras.applications.inception_v3 as InceptionV3
import keras.applications.densenet as densenet
from keras.utils.training_utils import multi_gpu_model
#import InceptionResNetV2, preprocess_input, decode_predictions resnet50

In [8]:
model_param = \
{
    'Xception': {'target_size': (299, 299),
                  'preprocess_input':Xception.preprocess_input,
                  'model_obj':Xception.Xception},
    'VGG16': {'target_size': (224, 224),
                 'preprocess_input':VGG16.preprocess_input,
                 'model_obj':VGG16.VGG16},
    'ResNet50': {'target_size': (224, 224),
                    'preprocess_input':resnet50.preprocess_input,
                    'model_obj':resnet50.ResNet50},
    'InceptionV3': {'target_size': (299, 299),
                    'preprocess_input':InceptionV3.preprocess_input,
                    'model_obj':InceptionV3.InceptionV3},
    'DenseNet': {'target_size': (224, 224),
              'preprocess_input':densenet.preprocess_input,
              'model_obj':densenet.DenseNet201}
}

In [9]:
def set_callback(model_name):
    top_weights_path = os.path.join(model_path, 'top_model_weights_{}.h5'.format(model_name))
    csv_path = os.path.join(model_path, 'top_model_csv_{}.h5'.format(model_name))
    callbacks_list = [
        ModelCheckpoint(top_weights_path, monitor='acc', verbose=1, save_best_only=True),
        EarlyStopping(monitor='loss', patience=50, verbose=0),
        CSVLogger(csv_path, separator=',', append=False)
    ]
    return (top_weights_path,csv_path,callbacks_list)
#tensor_board = callbacks.TensorBoard()
#set_callback('resnet_v2')

In [10]:
nb_classes = len(mtype_dict)
batch_size = 8

In [11]:
class Model_Generator():
    model_param = \
    {
        'Xception': {'target_size': (299, 299),
                      'preprocess_input':Xception.preprocess_input,
                      'model_obj':Xception.Xception,
                      'optimizer':'nadam',
                      'nb_epoch': 40
                    },
        'VGG16': {'target_size': (224, 224),
                     'preprocess_input':VGG16.preprocess_input,
                     'model_obj':VGG16.VGG16,
                     'rescale':1./255,
                     'optimizer':SGD(lr=0.0001, momentum=0.9),
                     'nb_epoch': 80
                 },
        'ResNet50': {'target_size': (224, 224),
                        'preprocess_input':resnet50.preprocess_input,
                        'model_obj':resnet50.ResNet50,
                        'optimizer':Adam(lr=1e-5),
                        'nb_epoch': 80
                    },
        'InceptionV3': {'target_size': (299, 299),
                        'preprocess_input':InceptionV3.preprocess_input,
                        'model_obj':InceptionV3.InceptionV3,
                        'optimizer':'nadam',
                        'nb_epoch': 40
                       },
        'DenseNet': {'target_size': (224, 224),
                     'preprocess_input':densenet.preprocess_input,
                     'model_obj':densenet.DenseNet201,
                     'optimizer':SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True),
                     'nb_epoch': 100
                    }
    }
    def __init__(self,name):
        self.param = self.model_param[name] 
        self.modelname = name
        self.preprocess_input = self.param['preprocess_input']
        (self.img_width, self.img_height) = self.param['target_size']
        self.model = self.param['model_obj'](input_shape=(self.img_width, self.img_height, 3), weights='imagenet', include_top=False)
            
    def get_model(self):
        if self.modelname == 'Xception':
            for layer in model.layers:
                layer.trainable = False
            x = self.model.output
            x = GlobalAveragePooling2D()(x)
            #x = BatchNormalization()(x)
            x = Dropout(0.2)(x)
            output = Dense(nb_classes, activation='softmax', name='softmax')(x)
            modelf = Model(self.model.input, output)
            modelf.compile(optimizer=self.param['optimizer'],
                          loss=categorical_crossentropy, metrics=['accuracy',])
        if self.modelname == 'VGG16':
            for layer in self.model.layers:
                layer.trainable = False
            x = self.model.output
            x = Flatten()(x)
            #x = Dropout(0.2)(x)
            #x = Dense(256, activation='relu')(x)
            x = Dropout(0.2)(x)
            output = Dense(nb_classes, activation='softmax', name='softmax')(x)
            modelf = Model(self.model.input, output)
            modelf.compile(optimizer=self.param['optimizer'],
                          loss=categorical_crossentropy, metrics=['accuracy',])
            
        if self.modelname == 'ResNet50':
            for layer in self.model.layers:
                layer.trainable = False
            x = self.model.output
            #x = GlobalAveragePooling2D()(x)
            x = Flatten()(x)
            x = Dropout(0.3)(x)
            output = Dense(nb_classes, activation='softmax', name='softmax')(x)
            modelf = Model(self.model.input, output)
            modelf.compile(optimizer=self.param['optimizer'],
                          loss=categorical_crossentropy, metrics=['accuracy',])
            
        if self.modelname == 'InceptionV3':
            for layer in self.model.layers:
                layer.trainable = False
            x = self.model.output
            x = GlobalMaxPooling2D()(x)
            #x = BatchNormalization()(x)
            #x = Dropout(0.2)(x)
            #x = Dense(256, activation='relu')(x)
            x = Dropout(0.2)(x)
            output = Dense(nb_classes, activation='softmax', name='softmax')(x)
            modelf = Model(self.model.input, output)
            modelf.compile(optimizer=self.param['optimizer'],
                          loss=categorical_crossentropy, metrics=['accuracy',])            
                
        if self.modelname == 'DenseNet':
            for layer in self.model.layers:
                layer.trainable = False
            x = self.model.output
            x = GlobalAveragePooling2D()(x)
            #x = BatchNormalization()(x)
            x = Dropout(0.2)(x)
            output = Dense(nb_classes, activation='softmax', name='softmax')(x)
            modelf = Model(self.model.input, output)
            modelf.compile(optimizer=self.param['optimizer'],
                          loss=categorical_crossentropy, metrics=['accuracy',])                
                
        return modelf

In [None]:
for model_name in ['DenseNet']:
    this_model = Model_Generator(model_name)
    seed = random.randint(1, 2000)
    print("Start to train %s" % (model_name))
    preprocess_input = this_model.param['preprocess_input']
    (img_width, img_height) = this_model.param['target_size']
    model = this_model.get_model()
    nb_epoch = this_model.param['nb_epoch']
    rescale = None
    if 'rescale' in this_model.param:
        rescale = this_model.param['rescale']

    data_gen = image.ImageDataGenerator(validation_split=0.2,
                                        fill_mode="nearest",
                                        rotation_range=20,
                                        width_shift_range=0.2,
                                        height_shift_range=0.2,
                                        shear_range=0.2,
                                        zoom_range=[0.8, 1.4],
                                        horizontal_flip=True,
                                        rescale=rescale)
                                        #preprocessing_function=preprocess_input)

    train_generator = data_gen.flow_from_directory(train_path,
                                        target_size=(img_width, img_height),
                                        batch_size=batch_size,
                                        class_mode='categorical',
                                        shuffle=True, seed=seed, subset="training")

    validation_generator = data_gen.flow_from_directory(train_path,
                                        target_size=(img_width, img_height),
                                        batch_size=batch_size,
                                        class_mode='categorical',
                                        shuffle=True, seed=seed, subset="validation")

    (top_weights_path,csv_path,callbacks_list) = set_callback(model_name)

    print("Start to fit %s" % (model_name))
    history = model.fit_generator(train_generator,
                        epochs=nb_epoch,
                        validation_data=validation_generator,
                        callbacks=callbacks_list,
                        workers=8,
                        use_multiprocessing=True)    
    
    
    print("End to fit %s" % (model_name))
    del history
    del this_model, model, train_generator, validation_generator, data_gen
    gc.collect()

Start to train DenseNet
Found 2957 images belonging to 5 classes.
Found 737 images belonging to 5 classes.
Start to fit DenseNet
Epoch 1/100

Epoch 00001: acc improved from -inf to 0.74197, saving model to ../data/aia-mid-term-exam-image-classification/img_classification/model/top_model_weights_DenseNet.h5


In [None]:
model_param

In [38]:
for model_name in model_param:
    weights_path = os.path.join(model_path, 'top_model_weights_{}.h5'.format(model_name))
    this_model = Model_Generator(model_name)
    preprocess_input = this_model.param['preprocess_input']
    (img_width, img_height) = this_model.param['target_size']
    model = this_model.get_model()
    rescale = None
    if 'rescale' in this_model.param:
        rescale = this_model.param['rescale']
    
    test_datagen = image.ImageDataGenerator(rescale=rescale, fill_mode="nearest",
                                       preprocessing_function=preprocess_input)

    test_generator =  test_datagen.flow_from_directory(train_path, 
                                         target_size=(img_width, img_height), 
                                         batch_size=batch_size,
                                         shuffle=False,
                                         class_mode=None)
    
    
    model.load_weights(weights_path)
    X_l_vec = model.predict_generator(generator=test_generator, workers=8)
    model_param[model_name]['predict'] = X_l_vec

Found 3694 images belonging to 5 classes.
Found 3694 images belonging to 5 classes.
Found 3694 images belonging to 5 classes.
Found 3694 images belonging to 5 classes.
Found 3694 images belonging to 5 classes.


In [39]:
X_l_vec = np.hstack((model_param['DenseNet']['predict'],
                     model_param['InceptionV3']['predict'],
                     model_param['Xception']['predict'],
                     model_param['ResNet50']['predict'],
                     model_param['VGG16']['predict']))

In [40]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [41]:
X = X_l_vec
Y = test_generator.classes

In [42]:
seed = 10
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

In [43]:
model_g = XGBClassifier()
model_g.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [44]:
# make predictions for test data
y_pred = model_g.predict(X_test)
predictions = [round(value) for value in y_pred]

  if diff:


In [45]:
X_test.shape

(739, 25)

In [46]:
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 94.45%


In [47]:
!ls {test_path}

test


In [48]:
for model_name in model_param:
    weights_path = os.path.join(model_path, 'top_model_weights_{}.h5'.format(model_name))
    this_model = Model_Generator(model_name)
    preprocess_input = this_model.param['preprocess_input']
    (img_width, img_height) = this_model.param['target_size']
    model = this_model.get_model()
    rescale = None
    if 'rescale' in this_model.param:
        rescale = this_model.param['rescale']
    
    test_datagen = image.ImageDataGenerator(rescale=rescale, fill_mode="nearest",
                                       preprocessing_function=preprocess_input)

    test_generator =  test_datagen.flow_from_directory(test_path, 
                                         target_size=(img_width, img_height), 
                                         batch_size=batch_size,
                                         shuffle=False,
                                         class_mode=None)
    
    
    model.load_weights(weights_path)
    X_t_vec = model.predict_generator(generator=test_generator, workers=8)
    model_param[model_name]['predict_test'] = X_t_vec

Found 500 images belonging to 1 classes.
Found 500 images belonging to 1 classes.
Found 500 images belonging to 1 classes.
Found 500 images belonging to 1 classes.
Found 500 images belonging to 1 classes.


In [49]:
X_t_vec = np.hstack((model_param['DenseNet']['predict_test'],
                     model_param['InceptionV3']['predict_test'],
                     model_param['Xception']['predict_test'],
                     model_param['ResNet50']['predict_test'],
                     model_param['VGG16']['predict_test']))

In [50]:
y_pred_test = model_g.predict(X_t_vec)
predictions = [round(value) for value in y_pred_test]

  if diff:


In [51]:
fnames = [_f.split('/')[1].replace('.jpg','') for _f in test_generator.filenames]

In [52]:
data_gen = image.ImageDataGenerator(validation_split=0.1,
                                        fill_mode="nearest",
                                        rotation_range=20,
                                        width_shift_range=0.2,
                                        height_shift_range=0.2,
                                        shear_range=0.2,
                                        zoom_range=[0.8, 1.4],
                                        horizontal_flip=True,
                                        rescale=rescale)

train_generator = data_gen.flow_from_directory(train_path,
                                    target_size=(img_width, img_height),
                                    batch_size=batch_size,
                                    class_mode='categorical',
                                    shuffle=True, seed=seed, subset="training")

Found 3327 images belonging to 5 classes.


In [53]:
class_list = [None] * len(train_generator.class_indices)
for _item in train_generator.class_indices:
    class_list[train_generator.class_indices[_item]] = _item

In [54]:
train_generator.class_indices

{'daisy': 0, 'dandelion': 1, 'rose': 2, 'sunflower': 3, 'tulip': 4}

In [55]:
cateidxs = [mtype_dict[class_list[_p]] for _p in predictions]

In [56]:
percentile_list = pd.DataFrame(
    {'id': fnames,
     'class': cateidxs
    })

In [57]:
percentile_list.to_csv('stacking_5.csv',encoding='utf-8', index=False,columns=["id","class"])

In [None]:
!ls

In [62]:
VGG16(include_top=False, weights='imagenet')

In [63]:
print(qqm.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_21 (InputLayer)        (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

In [64]:
qqm = VGG16.VGG16(include_top=True, weights='imagenet')
print(qqm.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_22 (InputLayer)        (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________