In [29]:
from keras.utils import np_utils
import numpy as np
from matplotlib import pyplot
import seaborn as sns
%matplotlib inline

# Display progress logs on stdout
#import logging
#logging.basicConfig(level=logging.DEBUG,
#                    format='%(asctime)s %(levelname)s %(message)s')

from sklearn.datasets import load_files
import pandas as pd
pd.set_option("display.max_colwidth", 75)

batch_size=32

In [None]:
# define function to load train, test, and validation datasets
basedir="/home/tutysara/src/myprojects/t4sa/data"
train_idx_path = basedir+ "/b-t4sa_train.txt"
valid_idx_path = basedir+ "/b-t4sa_val.txt"
test_idx_path = basedir+ "/b-t4sa_test.txt"


col_names = ["X", "y"]
train_data_df = pd.read_csv(train_idx_path, sep=" ", header=None, names=col_names)
valid_data_df = pd.read_csv(valid_idx_path, sep=" ", header=None, names=col_names)
test_data_df = pd.read_csv(test_idx_path, sep=" ", header=None, names=col_names)

In [2]:
basedir="/home/tutysara/src/myprojects/dog-project/dogImages"
train_idx_path = basedir+ "/train_list.txt"
valid_idx_path = basedir+ "/valid_list.txt"
test_idx_path = basedir+ "/test_list.txt"


col_names = ["X", "y"]
train_data_df = pd.read_csv(train_idx_path, sep=" ", header=None, names=col_names)
train_data_df.y = train_data_df.y-1
valid_data_df = pd.read_csv(valid_idx_path, sep=" ", header=None, names=col_names)
valid_data_df.y = valid_data_df.y-1
test_data_df = pd.read_csv(test_idx_path, sep=" ", header=None, names=col_names)
test_data_df.y = test_data_df.y-1

In [3]:
test_data_df.head()

Unnamed: 0,X,y
0,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05148.jpg,74
1,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05143.jpg,74
2,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05164.jpg,74
3,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05129.jpg,74
4,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05149.jpg,74


In [6]:
print(train_data_df.shape)
print(valid_data_df.shape)
print(test_data_df.shape)

print(train_data_df.shape[0] + valid_data_df.shape[0] + test_data_df.shape[0])
sample_df = test_data_df[:5]
sample_df

(6680, 2)
(835, 2)
(836, 2)
8351


Unnamed: 0,X,y
0,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05148.jpg,74
1,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05143.jpg,74
2,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05164.jpg,74
3,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05129.jpg,74
4,test/075.Glen_of_imaal_terrier/Glen_of_imaal_terrier_05149.jpg,74


In [7]:
# convert and load images
from keras.preprocessing import image                  
from tqdm import tqdm
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in img_paths]
    return np.vstack(list_of_tensors)

In [27]:
#definition of data generator
def df_data_generator(df, batch_size=32, num_classes=3, shuffle=False, basedir=".", transformer=None):
    if shuffle:
        df = df.sample(frac=1)
            
    X_file_name = df.X.apply(lambda x: basedir+"/"+x) 
    y = np_utils.to_categorical(df.y, num_classes)
    # infinitely serve batches
    
    while True:
        max_range = max(1, df.shape[0]//batch_size)
        for i in range(max_range):
            if transformer:
                yield transformer(X_file_name[i*batch_size : (i+1)*batch_size]).astype('float32'), y[i*batch_size : (i+1)*batch_size]
            else:
                yield X_file_name[i*batch_size : (i+1)*batch_size], y[i*batch_size : (i+1)*batch_size]
                

import keras.applications.mobilenet as mobilenet

def transformer(x):
    return mobilenet.preprocess_input(paths_to_tensor(x))


In [37]:
# load train, test, and validation datasets
train_gen = df_data_generator(train_data_df, batch_size=batch_size, transformer=transformer, basedir=basedir, num_classes=133)
valid_gen = df_data_generator(valid_data_df, batch_size=batch_size, transformer=transformer, basedir=basedir, num_classes=133)
test_gen = df_data_generator(test_data_df, batch_size=batch_size, transformer=transformer, basedir=basedir, num_classes=133)


In [38]:
bnf_train_name = 'bottleneck_features_mobilenet_train.npy'
bnf_valid_name = 'bottleneck_features_mobilenet_valid.npy'
bnf_test_name = 'bottleneck_features_mobilenet_test.npy' 

In [39]:
from keras.applications.mobilenet import MobileNet

mobilenet_feature_ext = MobileNet(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

bottleneck_features_train = mobilenet_feature_ext.predict_generator(train_gen, steps=(train_data_df.shape[0]//batch_size))
np.save(open(bnf_train_name, 'wb'),bottleneck_features_train)

bottleneck_features_test = mobilenet_feature_ext.predict_generator(valid_gen, steps=(valid_data_df.shape[0]//batch_size))
np.save(open(bnf_valid_name, 'wb'),bottleneck_features_test)

bottleneck_features_validation = mobilenet_feature_ext.predict_generator(test_gen, steps=(test_data_df.shape[0]//batch_size))
np.save(open(bnf_test_name, 'wb'),bottleneck_features_validation)

In [40]:
# load convered data back
train_data = np.load(open(bnf_train_name, 'rb'))
valid_data = np.load(open(bnf_valid_name, 'rb'))
test_data = np.load(open(bnf_test_name, 'rb'))


In [41]:
print(train_data.shape)
print(valid_data.shape)
print(test_data.shape)

(6656, 7, 7, 1024)
(832, 7, 7, 1024)
(832, 7, 7, 1024)


In [42]:
## build labels for batch classifier
train_labels = np_utils.to_categorical(train_data_df.y[:train_data.shape[0]], num_classes)
valid_labels = np_utils.to_categorical(valid_data_df.y[:valid_data.shape[0]], num_classes)
test_labels = np_utils.to_categorical(test_data_df.y[:test_data.shape[0]], num_classes)

In [43]:
print(train_labels.shape)
print(valid_labels.shape)
print(test_labels.shape)

(6656, 133)
(832, 133)
(832, 133)


In [32]:
def fex_data_generator(fex_data, df, batch_size=32, num_classes=3):
    # infinitely serve batches 
    y = np_utils.to_categorical(df.y, num_classes)
    while True:
        max_range = max(1, fex_data.shape[0]//batch_size)
        for i in range(max_range):
            yield fex_data[i*batch_size : (i+1)*batch_size], y[i*batch_size : (i+1)*batch_size]
                


In [52]:
## make a generator of loaded data
num_classes = 133
train_gen_fex =fex_data_generator(train_data, train_data_df, num_classes=num_classes, batch_size=256)
valid_gen_fex =fex_data_generator(valid_data, valid_data_df, num_classes=num_classes, batch_size=256)
test_gen_fex =fex_data_generator(test_data, test_data_df, num_classes=num_classes, batch_size=256) 

In [45]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import regularizers

model = Sequential()
#model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(GlobalAveragePooling2D(input_shape=(7, 7, 1024)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(133, activation='softmax'))

model.compile(optimizer='adam',
              loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_2 ( (None, 1024)              0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_4 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 133)               68229     
Total params: 593,029
Trainable params: 593,029
Non-trainable params: 0
_________________________________________________________________


In [46]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
model.fit(train_data[:250], train_labels[:250],
          epochs=25,
          batch_size=64,
          validation_data=(valid_data[:150], valid_labels[:150]),
          callbacks=[early_stopping])

Train on 250 samples, validate on 150 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 00005: early stopping


<keras.callbacks.History at 0x7f865485f240>

In [48]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
model.fit(train_data, train_labels,
          epochs=25,
          batch_size=64,
          validation_data=(valid_data, valid_labels),
          callbacks=[early_stopping])

Train on 6656 samples, validate on 832 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 00020: early stopping


<keras.callbacks.History at 0x7f86548d9ef0>

In [53]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
model.fit_generator(train_gen_fex,
          steps_per_epoch= (6656// 256),
          epochs=25,
          validation_data=valid_gen_fex,
          validation_steps= (832 // 256),
          callbacks=[early_stopping])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 00007: early stopping


<keras.callbacks.History at 0x7f86a08c22e8>