### Load Key Libraries

In [1]:
import numpy as np
import tensorflow as tf
import keras
import cv2
import pandas as pd
import random

Using TensorFlow backend.


### Load Data

In [10]:
## Load Labels
import csv
labels = [];
with open('data/label.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        labels.append(row[0])

In [12]:
labels = [int(label) for label in labels]

In [13]:
from collections import Counter
c = Counter(labels)
print(c)

Counter({0: 7331, 7: 4295, 2: 3957, 12: 2033, 10: 1974, 8: 1568, 21: 852, 5: 765, 4: 631, 18: 598, 9: 473, 16: 401, 6: 259, 3: 172, 11: 149, 14: 132, 13: 107, 17: 104, 15: 72, 1: 39, 20: 20, 19: 1})


In [15]:
### Put Files Into Correct Folders, Only do it once
'''
import os
import shutil

count = 0;
for i in range(1, 26201):
    path1 = 'img/img' + '%05d' %i + '.jpg'
    if (os.path.isfile(path1)):
        if (i%10 < 8):
            path2 = 'train_img/' + '%d' %labels[count] + '/%05d' %i + '.jpg'
        else:
            path2 = 'valid_img/' + '%d' %labels[count] + '/%05d' %i + '.jpg'
        
        shutil.move(path1, path2)
        count += 1
'''

### Preprocessing

In [2]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

def randombrightness(img):
    return img * np.random.uniform(0.95, 1.05)

datagen = ImageDataGenerator(
    width_shift_range = 0.15,
    height_shift_range = 0.15,
    horizontal_flip = True,
    vertical_flip = True,
    rotation_range = 90,
    shear_range=0.1,
    rescale = 1./255,
    zoom_range = [0.95, 1.05],
    channel_shift_range = 15,
    preprocessing_function = randombrightness,
    fill_mode='nearest'
)

### Build The Model From Scratch

In [9]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

model = Sequential([
    Conv2D(filters = 32, kernel_size = 3, padding='valid', activation='relu', input_shape=(440,440,3)),
    MaxPooling2D(2,2,'valid'),
    Conv2D(filters = 64, kernel_size = 3, padding='valid', activation='relu'),
    MaxPooling2D(2,2,'valid'),
    Conv2D(filters = 128, kernel_size = 3, padding='same', activation='relu'),
    MaxPooling2D(2,2,'same'),
    Conv2D(filters = 256, kernel_size = 3, padding='same', activation='relu'),
    MaxPooling2D(2,2,'same'),
    Conv2D(filters = 512, kernel_size = 3, padding='same', activation='relu'),
    MaxPooling2D(2,2,'same'),
    Conv2D(filters = 1024, kernel_size = 3, padding='same', activation='relu'),
    MaxPooling2D(2,2,'same'),
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(1024, activation='relu'),
    Dropout(0.5),
    Dense(22, activation='softmax')
])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 438, 438, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 219, 219, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 217, 217, 64)      18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 108, 108, 64)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 108, 108, 128)     73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 54, 54, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 54, 54, 256)       295168    
__________

In [10]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [11]:
## random crop
def random_crop(img, random_crop_size):
    dx, dy = random_crop_size
    height, width = img.shape[0], img.shape[1]
    x = np.random.randint(50, width-dx+1-50)
    y = np.random.randint(50, height-dy+1-50)
    return img[y:(y+dy), x:(x+dx)]

def fix_crop(img, random_crop_size):
    dx, dy = random_crop_size
    x = 108
    y = 108
    return img[y:(y+dy), x:(x+dx)]

def crop_generator(batches, crop_length):
    while True:
        batch_x, batch_y = next(batches)
        batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3))
        for i in range(batch_x.shape[0]):
            batch_crops[i] = random_crop(batch_x[i], (crop_length, crop_length))
        yield (batch_crops, batch_y)
        
def crop_generator2(batches, crop_length):
    while True:
        batch_x, batch_y = next(batches)
        batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3))
        for i in range(batch_x.shape[0]):
            batch_crops[i] = fix_crop(batch_x[i], (crop_length, crop_length))
        yield (batch_crops, batch_y)

In [3]:
train_batches = datagen.flow_from_directory(
    'train_img',
    target_size=(440,440),
    batch_size=32
)
#train_crops = crop_generator(train_batches, 224)

Found 20738 images belonging to 22 classes.


In [4]:
datagen_v = ImageDataGenerator(rescale=1./255)

In [5]:
valid_batches = datagen_v.flow_from_directory(
    'valid_img',
    target_size=(440, 440),
    batch_size=32
)
#valid_crops = crop_generator2(valid_batches, 224)

Found 5195 images belonging to 22 classes.


In [15]:
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='mymodel/weights.best.fromScratch.hdf5',
                              verbose=1, save_best_only=True)
model.fit_generator(
    train_batches,
    steps_per_epoch = int(20738 / 32),
    epochs=20,
    validation_data = valid_batches,
    validation_steps = int(5195 / 32),
    callbacks = [checkpointer],
    verbose=1
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fb4a8fa9940>

### Continue Training

In [None]:
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='mymodel/weights.best.fromScratch.hdf5',
                              verbose=1, save_best_only=True)
model = load_model('mymodel/weights.best.fromScratch.hdf5')
model.fit_generator(
    train_batches,
    steps_per_epoch = int(20738 / 32),
    epochs=50,
    validation_data = valid_batches,
    validation_steps = int(5195 / 32),
    callbacks = [checkpointer],
    verbose=1
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50


Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50

## Conclusion: Build From Scratch has Best Result 88%

### Try ResNet 50 Fine Tuning

In [29]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(
    width_shift_range = 0.15,
    height_shift_range = 0.15,
    horizontal_flip = True,
    rotation_range = 50,
    shear_range=0.1,
    fill_mode='nearest'
)
train_generator = datagen.flow_from_directory(
    'train_img',
    target_size=(224,224),
    batch_size=1
)

Found 20738 images belonging to 22 classes.


In [30]:
datagen_v = ImageDataGenerator()
valid_generator = datagen_v.flow_from_directory(
    'valid_img',
    target_size=(224,224),
    batch_size=1
)

Found 5195 images belonging to 22 classes.


In [31]:
from keras.applications import resnet50
model2 = resnet50.ResNet50(include_top=False, weights='imagenet')

In [32]:
bottleneck_features_train = model2.predict_generator(
    train_generator,
    20738,
)
np.save('bottleneck_features_train.npy', bottleneck_features_train)

In [33]:
len(bottleneck_features_train)

20738

In [34]:
bottleneck_features_valid = model2.predict_generator(
    valid_generator,
    5195,
)
np.save('bottleneck_features_valid.npy', bottleneck_features_valid)

In [35]:
len(bottleneck_features_valid)

5195

In [36]:
from keras.utils.np_utils import to_categorical
train_data = np.load('bottleneck_features_train.npy')
train_labels = train_generator.classes
train_labels = to_categorical(train_labels[0:len(bottleneck_features_train)], num_classes=22)

In [37]:
valid_data = np.load('bottleneck_features_valid.npy')
valid_labels = valid_generator.classes
valid_labels = to_categorical(valid_labels[0:len(bottleneck_features_valid)], num_classes=22)

In [38]:
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense

model2 = Sequential([
    Flatten(input_shape = train_data.shape[1:]),
    Dense(512, activation = 'relu'),
    Dropout(0.5),
    Dense(22, activation='softmax')
])
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [40]:
model2.fit(
    train_data,
    train_labels,
    epochs = 20,
    batch_size = 32,
    validation_data = (valid_data, valid_labels),
    verbose=1
)

Train on 20738 samples, validate on 5195 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f5718c63be0>

## Fine Tune ResNet 50 Completely Fails

### Retrain ResNet 50

In [70]:
from keras.applications import resnet50
from keras.layers import Input
model2 = resnet50.ResNet50(include_top=True, weights=None, input_tensor=Input(shape=(224,224,3)), classes=22)

In [63]:
#model2.summary()

In [65]:
#If you want to use your own top
'''from keras.models import Model
x = model2.output
x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(256)(x)
x = Dropout(0.5)(x)
x = Dense(22)(x)
model2 = Model(model2.input, x)
'''

In [71]:
model2.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_8 (InputLayer)             (None, 224, 224, 3)   0                                            
____________________________________________________________________________________________________
zero_padding2d_8 (ZeroPadding2D) (None, 230, 230, 3)   0                                            
____________________________________________________________________________________________________
conv1 (Conv2D)                   (None, 112, 112, 64)  9472                                         
____________________________________________________________________________________________________
bn_conv1 (BatchNormalization)    (None, 112, 112, 64)  256                                          
___________________________________________________________________________________________

In [72]:
datagen = ImageDataGenerator(
    width_shift_range = 0.15,
    height_shift_range = 0.15,
    horizontal_flip = True,
    rotation_range = 50,
    shear_range=0.1,
    rescale = 1./255,
    fill_mode='nearest'
)
train_generator = datagen.flow_from_directory(
    'train_img',
    target_size=(224,224),
    batch_size=32
)
datagen_v = ImageDataGenerator(
    rescale = 1./255
)
valid_generator = datagen_v.flow_from_directory(
    'valid_img',
    target_size=(224,224),
    batch_size=32
)

Found 20738 images belonging to 22 classes.
Found 5195 images belonging to 22 classes.


In [73]:
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [77]:
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='mymodel/weights.best.ResNet50.hdf5',
                              verbose=1, save_best_only=True)
model2.fit_generator(
    train_generator,
    steps_per_epoch = int(20738 / 32),
    epochs=20,
    validation_data = valid_generator,
    validation_steps = int(5195 / 32),
    callbacks = [checkpointer],
    verbose=1
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f56e2ac85c0>

In [None]:
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='mymodel/weights.best.ResNet50.hdf5',
                              verbose=1, save_best_only=True)
model2 = load_model('mymodel/weights.best.ResNet50.hdf5')
model2.fit_generator(
    train_generator,
    steps_per_epoch = int(20738 / 32),
    epochs=20,
    validation_data = valid_generator,
    validation_steps = int(5195 / 32),
    callbacks = [checkpointer],
    verbose=1
)

Epoch 1/20
Epoch 2/20