In [1]:
from keras.utils import np_utils
import numpy as np
from matplotlib import pyplot
import seaborn as sns
%matplotlib inline

# Display progress logs on stdout
#import logging
#logging.basicConfig(level=logging.DEBUG,
#                    format='%(asctime)s %(levelname)s %(message)s')

from sklearn.datasets import load_files
import pandas as pd
pd.set_option("display.max_colwidth", 75)

import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config = config)

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
# define function to load train, test, and validation datasets
basedir="/home/tutysara/src/myprojects/t4sa/data"
train_idx_path = basedir+ "/b-t4sa_train.txt"
valid_idx_path = basedir+ "/b-t4sa_val.txt"
test_idx_path = basedir+ "/b-t4sa_test.txt"


col_names = ["X", "y"]
percent = 1
train_data_df = pd.read_csv(train_idx_path, sep=" ", header=None, names=col_names)
valid_data_df = pd.read_csv(valid_idx_path, sep=" ", header=None, names=col_names)
test_data_df = pd.read_csv(test_idx_path, sep=" ", header=None, names=col_names)

train_data_df = train_data_df[:int(train_data_df.shape[0]*percent)]
valid_data_df = valid_data_df[:int(valid_data_df.shape[0]*percent)]
test_data_df = test_data_df[:int(test_data_df.shape[0]*percent)]

In [3]:
test_data_df.head()

Unnamed: 0,X,y
0,data/76878/768781748033335296-1.jpg,2
1,data/80174/801747152955326465-1.jpg,0
2,data/76899/768993807845163008-3.jpg,2
3,data/80162/801624637314629634-1.jpg,0
4,data/78083/780835980597194753-1.jpg,2


In [4]:
print(train_data_df.shape)
print(valid_data_df.shape)
print(test_data_df.shape)

print(train_data_df.shape[0] + valid_data_df.shape[0] + test_data_df.shape[0])
sample_df = test_data_df[:5]
sample_df

(368586, 2)
(51000, 2)
(51000, 2)
470586


Unnamed: 0,X,y
0,data/76878/768781748033335296-1.jpg,2
1,data/80174/801747152955326465-1.jpg,0
2,data/76899/768993807845163008-3.jpg,2
3,data/80162/801624637314629634-1.jpg,0
4,data/78083/780835980597194753-1.jpg,2


In [10]:
# convert and load images
from keras.preprocessing import image                  
from tqdm import tqdm
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in img_paths]
    return np.vstack(list_of_tensors)

In [12]:
#definition of data generator
def df_data_generator(df, batch_size=32, num_classes=3, shuffle=False, basedir=".", transformer=None):
    if shuffle:
        df = df.sample(frac=1)
            
    X_file_name = df.X.apply(lambda x: basedir+"/"+x) 
    y = np_utils.to_categorical(df.y, num_classes)
    # infinitely serve batches
    
    while True:
        max_range = max(1, df.shape[0]//batch_size)
        for i in range(max_range):
            if transformer:
                yield transformer(X_file_name[i*batch_size : (i+1)*batch_size]).astype('float32'), y[i*batch_size : (i+1)*batch_size]
            else:
                yield X_file_name[i*batch_size : (i+1)*batch_size], y[i*batch_size : (i+1)*batch_size]
                

import keras.applications.mobilenet as mobilenet

def transformer(x):
    return mobilenet.preprocess_input(paths_to_tensor(x))


In [6]:
# load train, test, and validation datasets
batch_size=64
train_gen = df_data_generator(train_data_df, batch_size=batch_size, transformer=transformer, basedir=basedir, num_classes=133)
valid_gen = df_data_generator(valid_data_df, batch_size=batch_size, transformer=transformer, basedir=basedir, num_classes=133)
test_gen = df_data_generator(test_data_df, batch_size=batch_size, transformer=transformer, basedir=basedir, num_classes=133)


In [7]:
bnf_train_name = 'bottleneck_features_mobilenet_train.npy'
bnf_valid_name = 'bottleneck_features_mobilenet_valid.npy'
bnf_test_name = 'bottleneck_features_mobilenet_test.npy' 

In [None]:
from keras.applications.mobilenet import MobileNet
import time

mobilenet_feature_ext = MobileNet(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

s= time.time()
bottleneck_features_test = mobilenet_feature_ext.predict_generator(valid_gen, steps=(valid_data_df.shape[0]//batch_size))
np.save(open(bnf_valid_name, 'wb'),bottleneck_features_test)
print("Took {} seconds".format(time.time()-s))

s= time.time()
bottleneck_features_validation = mobilenet_feature_ext.predict_generator(test_gen, steps=(test_data_df.shape[0]//batch_size))
np.save(open(bnf_test_name, 'wb'),bottleneck_features_validation)
print("Took {} seconds".format(time.time()-s))

s= time.time()
bottleneck_features_train = mobilenet_feature_ext.predict_generator(train_gen, steps=(train_data_df.shape[0]//batch_size))
np.save(open(bnf_train_name, 'wb'),bottleneck_features_train)
print("Took {} seconds".format(time.time()-s))

In [None]:
# load convered data back
train_data = np.load(open(bnf_train_name, 'rb'))
valid_data = np.load(open(bnf_valid_name, 'rb'))
test_data = np.load(open(bnf_test_name, 'rb'))


In [None]:
print(train_data.shape)
print(valid_data.shape)
print(test_data.shape)

In [None]:
## build labels for batch classifier
train_labels = np_utils.to_categorical(train_data_df.y[:train_data.shape[0]], num_classes)
valid_labels = np_utils.to_categorical(valid_data_df.y[:valid_data.shape[0]], num_classes)
test_labels = np_utils.to_categorical(test_data_df.y[:test_data.shape[0]], num_classes)

In [None]:
print(train_labels.shape)
print(valid_labels.shape)
print(test_labels.shape)

In [None]:
def fex_data_generator(fex_data, df, batch_size=32, num_classes=3):
    # infinitely serve batches 
    y = np_utils.to_categorical(df.y, num_classes)
    while True:
        max_range = max(1, fex_data.shape[0]//batch_size)
        for i in range(max_range):
            yield fex_data[i*batch_size : (i+1)*batch_size], y[i*batch_size : (i+1)*batch_size]
                


In [None]:
## make a generator of loaded data
num_classes = 133
train_gen_fex =fex_data_generator(train_data, train_data_df, num_classes=num_classes, batch_size=256)
valid_gen_fex =fex_data_generator(valid_data, valid_data_df, num_classes=num_classes, batch_size=256)
test_gen_fex =fex_data_generator(test_data, test_data_df, num_classes=num_classes, batch_size=256) 

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import regularizers

top_model = Sequential()
#model.add(Flatten(input_shape=train_data.shape[1:]))
top_model.add(GlobalAveragePooling2D(input_shape=(7, 7, 1024)))
top_model.add(Dropout(0.2))
top_model.add(Dense(512, activation='relu'))
top_model.add(Dropout(0.2))
top_model.add(Dense(133, activation='softmax'))

top_model.compile(optimizer='adam',
              loss='categorical_crossentropy', metrics=['accuracy'])

top_model.summary()

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
top_model.fit(train_data[:250], train_labels[:250],
          epochs=25,
          batch_size=64,
          validation_data=(valid_data[:150], valid_labels[:150]),
          callbacks=[early_stopping])

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.topmodel.mobilenet.hdf5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
top_model.fit_generator(train_gen_fex,
          steps_per_epoch= (train_data_df.shape[0]// batch_size),
          epochs=25,
          validation_data=valid_gen_fex,
          validation_steps= (valid_data_df.shape[0] // batch_size),
          callbacks=[early_stopping, checkpointer])

In [None]:
# fine tune on full model
model = MobileNet(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
model.add(top_model)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])
model.summary()

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.fullmodel.mobilenet.hdf5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
model.fit_generator(train_gen,
          steps_per_epoch= (train_data_df.shape[0]// batch_size),
          epochs=25,
          validation_data=valid_gen,
          validation_steps= (valid_data_df.shape[0] // batch_size),
          callbacks=[early_stopping, checkpointer])