In [1]:
from __future__ import print_function
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
from collections import Counter, OrderedDict

from PIL import Image as PImage
from os import listdir
from pickle import dump
import matplotlib.pyplot as plt
import PIL, cv2, os, json, glob, h5py, keras, csv
from IPython.display import SVG

import tensorflow as tf
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import backend as K
from keras.utils import to_categorical
from keras.backend.tensorflow_backend import set_session
from keras.callbacks import TensorBoard
from keras.applications.vgg16 import preprocess_input
from keras.utils.vis_utils import plot_model, model_to_dot

import seaborn as sns

Using TensorFlow backend.


In [2]:
train_path = '../data/aia-picture-classification1/train'
test_path = '../data/aia-picture-classification1/test'
model_path = '../data/aia-picture-classification1/model'

In [3]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 9795142609091428438
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 6966991258
locality {
  bus_id: 1
}
incarnation: 17566226578062876436
physical_device_desc: "device: 0, name: TITAN Xp, pci bus id: 0000:02:00.0, compute capability: 6.1"
]


In [4]:
label_list = !ls {train_path}
label_list

['bedroom',
 'CALsuburb',
 'coast',
 'forest',
 'highway',
 'industrial',
 'insidecity',
 'kitchen',
 'livingroom',
 'mountain',
 'opencountry',
 'PARoffice',
 'store',
 'street',
 'tallbuilding']

In [5]:
mtype_dict = {}
mtype_list = [None] * len(label_list)
with open('../data/aia-picture-classification1/target_to_number.txt', newline='') as csvfile:
    _dict = csv.DictReader(csvfile)
    for idx, row in enumerate(_dict):
        mtype_dict[row['type'].strip()] = int(row[' index'].strip())
        mtype_list[int(row[' index'].strip())] = row['type'].strip()

In [6]:
mtype_list

['kitchen',
 'street',
 'industrial',
 'insidecity',
 'forest',
 'livingroom',
 'opencountry',
 'PARoffice',
 'mountain',
 'CALsuburb',
 'coast',
 'store',
 'bedroom',
 'tallbuilding',
 'highway']

In [7]:
from keras.layers import *
from keras.optimizers import *
from keras.applications import *

from keras.preprocessing.image import img_to_array, load_img
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Conv2D, Dropout, BatchNormalization
from keras import regularizers
from keras.optimizers import Adam
from keras.losses import categorical_crossentropy
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger


In [8]:
img_width , img_height = 299, 299  # change based on the shape/structure of your images
nb_classes = len(mtype_dict)
batch_size = 32
nb_epoch = 100
based_model_last_block_layer_number = 126  # value is based on based model selected.

In [9]:
base_model = Xception(input_shape=(img_width, img_height, 3), weights='imagenet', include_top=False)

for layer in base_model.layers:
    layer.trainable = False

    
x = base_model.output
x = GlobalMaxPooling2D()(x)
x = Dropout(0.2)(x)
output = Dense(nb_classes, activation='softmax')(x)    

# add your top layer block to your base model
model = Model(base_model.input, output)
print(model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 299, 299, 3)  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 149, 149, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 149, 149, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 149, 149, 32) 0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

In [10]:
train_datagen  = image.ImageDataGenerator(featurewise_center=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1./255,
    shear_range=0.2,
    fill_mode="nearest",
    zoom_range=0.4,
    horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(train_path,
                                                    shuffle=True,
                                                    target_size=(img_width, img_height),
                                                    batch_size=batch_size)

Found 2985 images belonging to 15 classes.


In [11]:
test_datagen = image.ImageDataGenerator(rescale=1./255,featurewise_center=True,fill_mode="nearest")

test_generator =  test_datagen.flow_from_directory(test_path, 
                                     target_size=(img_width, img_height), 
                                     batch_size=batch_size,
                                     shuffle=False,
                                     class_mode=None)

Found 1500 images belonging to 1 classes.


In [12]:
model.compile(optimizer='nadam',
              loss=categorical_crossentropy, metrics=['accuracy',])

In [13]:
top_weights_path = os.path.join(model_path, 'top_model_weights_6.h5')
csv_path = os.path.join(model_path, 'top_model_csv_6.h5')
callbacks_list = [
    ModelCheckpoint(top_weights_path, monitor='acc', verbose=1, save_best_only=True),
    EarlyStopping(monitor='loss', patience=10, verbose=0),
    CSVLogger(csv_path, separator=',', append=False)
]

In [14]:
filenames = train_generator.filenames
nb_samples = len(filenames)

print('There are {0} training data.'.format(train_generator.n))

There are 2985 training data.


In [15]:
model.fit_generator(train_generator,
                    steps_per_epoch=train_generator.n // batch_size,
                    epochs=nb_epoch / 5,
                    #validation_data=test_generator,
                    #nb_val_samples=test_generator.n,
                    callbacks=callbacks_list,
                    workers=16,
                    use_multiprocessing=True)

Epoch 1/20

Epoch 00001: acc improved from -inf to 0.35591, saving model to ../data/aia-picture-classification1/model/top_model_weights_6.h5
Epoch 2/20

Epoch 00002: acc improved from 0.35591 to 0.60616, saving model to ../data/aia-picture-classification1/model/top_model_weights_6.h5
Epoch 3/20

Epoch 00003: acc improved from 0.60616 to 0.65154, saving model to ../data/aia-picture-classification1/model/top_model_weights_6.h5
Epoch 4/20

Epoch 00004: acc improved from 0.65154 to 0.67694, saving model to ../data/aia-picture-classification1/model/top_model_weights_6.h5
Epoch 5/20

Epoch 00005: acc improved from 0.67694 to 0.69793, saving model to ../data/aia-picture-classification1/model/top_model_weights_6.h5
Epoch 6/20

Epoch 00006: acc improved from 0.69793 to 0.71317, saving model to ../data/aia-picture-classification1/model/top_model_weights_6.h5
Epoch 7/20

Epoch 00007: acc did not improve
Epoch 8/20

Epoch 00008: acc improved from 0.71317 to 0.73891, saving model to ../data/aia-pic

<keras.callbacks.History at 0x7f89a86c27b8>

In [34]:
model.load_weights(top_weights_path)

In [35]:
for layer in model.layers[:based_model_last_block_layer_number]:
    layer.trainable = False
for layer in model.layers[based_model_last_block_layer_number:]:
    layer.trainable = True

In [36]:
model.compile(optimizer='nadam',
              loss=categorical_crossentropy, metrics=['accuracy',])

In [19]:
final_weights_path = os.path.join(model_path, 'model_weights_v5.h5')
csv_path = os.path.join(model_path, 'model_csv_v5.csv')
callbacks_list = [
    ModelCheckpoint(final_weights_path, monitor='acc', verbose=1, save_best_only=True),
    EarlyStopping(monitor='loss', patience=20, verbose=0),
    CSVLogger(csv_path, separator=',', append=False)
]

In [20]:

# fine-tune the model
model.fit_generator(train_generator,
                    steps_per_epoch=train_generator.n // batch_size,
                    epochs=nb_epoch,
                    callbacks=callbacks_list,
                    workers=16,
                    use_multiprocessing=True)


# save model
model_json = model.to_json()
with open(os.path.join(model_path, 'model_v5.json'), 'w') as json_file:
    json_file.write(model_json)

Epoch 1/100

Epoch 00001: acc improved from -inf to 0.78158, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 2/100


Epoch 00002: acc improved from 0.78158 to 0.87030, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 3/100

Epoch 00003: acc improved from 0.87030 to 0.88927, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 4/100

Epoch 00004: acc improved from 0.88927 to 0.90417, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 5/100

Epoch 00005: acc improved from 0.90417 to 0.90586, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 6/100

Epoch 00006: acc improved from 0.90586 to 0.92414, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 7/100

Epoch 00007: acc improved from 0.92414 to 0.92787, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 8/


Epoch 00050: acc improved from 0.97765 to 0.98118, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 51/100

Epoch 00051: acc did not improve
Epoch 52/100

Epoch 00052: acc did not improve
Epoch 53/100

Epoch 00053: acc improved from 0.98118 to 0.98152, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 54/100

Epoch 00054: acc did not improve
Epoch 55/100

Epoch 00055: acc did not improve
Epoch 56/100

Epoch 00056: acc did not improve
Epoch 57/100

Epoch 00057: acc did not improve
Epoch 58/100

Epoch 00058: acc did not improve
Epoch 59/100

Epoch 00059: acc did not improve
Epoch 60/100

Epoch 00060: acc did not improve
Epoch 61/100

Epoch 00061: acc did not improve
Epoch 62/100

Epoch 00062: acc improved from 0.98152 to 0.98488, saving model to ../data/aia-picture-classification1/model/model_weights_v5.h5
Epoch 63/100

Epoch 00063: acc improved from 0.98488 to 0.98703, saving model to ../data/aia-picture-classificatio

In [21]:
model.load_weights(final_weights_path)

In [22]:
X_l_vec = model.predict_generator(generator=test_generator)

In [23]:
predict_index = np.argmax(X_l_vec, axis=-1)

In [24]:
fnames = [_f.split('/')[1].replace('.jpg','') for _f in test_generator.filenames]

In [37]:
predict_index

array([ 1,  4,  4, ..., 10, 11, 13])

In [26]:
fnames[0]

'001d4c8d70ebf7f025fccf256324d3d5ad3560faee1cdf8c7115f5eb033bc3d2'

In [27]:
class_list = [None] * len(train_generator.class_indices)
for _item in train_generator.class_indices:
    class_list[train_generator.class_indices[_item]] = _item

In [28]:
train_generator.class_indices

{'CALsuburb': 0,
 'PARoffice': 1,
 'bedroom': 2,
 'coast': 3,
 'forest': 4,
 'highway': 5,
 'industrial': 6,
 'insidecity': 7,
 'kitchen': 8,
 'livingroom': 9,
 'mountain': 10,
 'opencountry': 11,
 'store': 12,
 'street': 13,
 'tallbuilding': 14}

In [29]:
predict_index.shape

(1500,)

In [30]:
mtype_dict

{'CALsuburb': 9,
 'PARoffice': 7,
 'bedroom': 12,
 'coast': 10,
 'forest': 4,
 'highway': 14,
 'industrial': 2,
 'insidecity': 3,
 'kitchen': 0,
 'livingroom': 5,
 'mountain': 8,
 'opencountry': 6,
 'store': 11,
 'street': 1,
 'tallbuilding': 13}

In [31]:
cateidxs = [mtype_dict[class_list[_p]] for _p in predict_index]

In [32]:
percentile_list = pd.DataFrame(
    {'id': fnames,
     'class': cateidxs
    })

In [33]:
percentile_list.to_csv('out_5.csv',encoding='utf-8', index=False,columns=["id","class"])