In [1]:
from keras.utils import np_utils
import numpy as np
from matplotlib import pyplot
import seaborn as sns
%matplotlib inline

# Display progress logs on stdout
#import logging
#logging.basicConfig(level=logging.DEBUG,
#                    format='%(asctime)s %(levelname)s %(message)s')

from sklearn.datasets import load_files
import pandas as pd
pd.set_option("display.max_colwidth", 75)

import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config = config)

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
basedir="/home/tutysara/src/myprojects/dog-project/dogImages"
train_idx_path = basedir+ "/train_list.txt"
valid_idx_path = basedir+ "/valid_list.txt"
test_idx_path = basedir+ "/test_list.txt"


col_names = ["X", "y"]
train_data_df = pd.read_csv(train_idx_path, sep=" ", header=None, names=col_names)
train_data_df.y = train_data_df.y-1
valid_data_df = pd.read_csv(valid_idx_path, sep=" ", header=None, names=col_names)
valid_data_df.y = valid_data_df.y-1
test_data_df = pd.read_csv(test_idx_path, sep=" ", header=None, names=col_names)
test_data_df.y = test_data_df.y-1

In [3]:
test_data_df.head()

Unnamed: 0,X,y
0,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05148.jpg,74
1,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05143.jpg,74
2,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05164.jpg,74
3,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05129.jpg,74
4,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05149.jpg,74


In [4]:
print(train_data_df.shape)
print(valid_data_df.shape)
print(test_data_df.shape)

print(train_data_df.shape[0] + valid_data_df.shape[0] + test_data_df.shape[0])
sample_df = test_data_df[:5]
sample_df

(6680, 2)
(835, 2)
(836, 2)
8351


Unnamed: 0,X,y
0,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05148.jpg,74
1,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05143.jpg,74
2,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05164.jpg,74
3,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05129.jpg,74
4,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05149.jpg,74


In [5]:
# convert and load images
from keras.preprocessing import image                  
from tqdm import tqdm
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in img_paths]
    return np.vstack(list_of_tensors)

In [10]:
#definition of data generator
def df_data_generator(df, batch_size=32, num_classes=3, shuffle=False, basedir=".", transformer=None):    
    while True:
        if shuffle:
            df = df.sample(frac=1)
            
        X_file_name = df.X.apply(lambda x: basedir+"/"+x) 
        y = np_utils.to_categorical(df.y, num_classes)
    # infinitely serve batches
        max_range = max(1, df.shape[0]//batch_size)
        for i in range(max_range):
            if transformer:
                yield transformer(X_file_name[i*batch_size : (i+1)*batch_size]).astype('float32'), y[i*batch_size : (i+1)*batch_size]
            else:
                yield X_file_name[i*batch_size : (i+1)*batch_size], y[i*batch_size : (i+1)*batch_size]
                

from keras.applications.mobilenet import MobileNet
import keras.applications.mobilenet as mobilenet
mobilenet_feature_ext = MobileNet(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
mobilenet_feature_ext._make_predict_function()

def transformer(x):
    return mobilenet.preprocess_input(paths_to_tensor(x))


In [12]:
# load train, test, and validation datasets
batch_size=4096
train_gen = df_data_generator(train_data_df, batch_size=batch_size, transformer=transformer, basedir=basedir, num_classes=133)
valid_gen = df_data_generator(valid_data_df, batch_size=batch_size, transformer=transformer, basedir=basedir, num_classes=133)
test_gen = df_data_generator(test_data_df, batch_size=batch_size, transformer=transformer, basedir=basedir, num_classes=133)


In [8]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import regularizers

top_model = Sequential()
#model.add(Flatten(input_shape=train_data.shape[1:]))
top_model.add(GlobalAveragePooling2D(input_shape=(7, 7, 1024)))
top_model.add(Dropout(0.2))
top_model.add(Dense(512, activation='relu'))
top_model.add(Dropout(0.2))
top_model.add(Dense(133, activation='softmax'))

top_model.compile(optimizer='adam',
              loss='categorical_crossentropy', metrics=['accuracy'])

top_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_1 ( (None, 1024)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 133)               68229     
Total params: 593,029
Trainable params: 593,029
Non-trainable params: 0
_________________________________________________________________


In [13]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
#checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.topmodel.mobilenet.hdf5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=2, verbose=1)
top_model.fit_generator(train_gen,
          steps_per_epoch= (1 + (train_data_df.shape[0]// batch_size)),
          epochs=25,
          validation_data=valid_gen,
          validation_steps= (1 + (valid_data_df.shape[0] // batch_size)),
          callbacks=[early_stopping])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 00005: early stopping


<keras.callbacks.History at 0x7f4456f43518>

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
top_model.fit(train_data[:250], train_labels[:250],
          epochs=25,
          batch_size=64,
          validation_data=(valid_data[:150], valid_labels[:150]),
          callbacks=[early_stopping])

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.topmodel.mobilenet.hdf5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
top_model.fit_generator(train_gen_fex,
          steps_per_epoch= (train_data_df.shape[0]// batch_size),
          epochs=25,
          validation_data=valid_gen_fex,
          validation_steps= (valid_data_df.shape[0] // batch_size),
          callbacks=[early_stopping, checkpointer])

In [None]:
# fine tune on full model
model = MobileNet(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
model.add(top_model)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])
model.summary()

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.fullmodel.mobilenet.hdf5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
model.fit_generator(train_gen,
          steps_per_epoch= (train_data_df.shape[0]// batch_size),
          epochs=25,
          validation_data=valid_gen,
          validation_steps= (valid_data_df.shape[0] // batch_size),
          callbacks=[early_stopping, checkpointer])