## Parameter setting and import libraries

In [20]:
from __future__ import print_function
#
import os
import glob
import re
import argparse
import pandas as pd
import numpy as np
import subprocess
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import matplotlib.pyplot as plt

import time
from time import sleep
from tqdm import tqdm # if use notebook
#from tqdm import tqdm_notebook as tqdm

from threading import Thread, Event, Timer
import queue

from PIL import Image
import cv2
import imgaug as ia
from imgaug import augmenters as iaa

import random

parser = argparse.ArgumentParser()
parser.add_argument('--gpu_id', default=6)
parser.add_argument('--image_dir', default="/home/seanyu/datasets/cat_dog/dataset/") # Path to where you put your data
parser.add_argument('--save_dir', default='./result')
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--do_augment', default=True, type = bool)
parser.add_argument('--epochs', default=50, type=int)
parser.add_argument('--lr', default=0.0001, type=float)
parser.add_argument('--image_size', default=(256,256,3), type = int)
parser.add_argument('--n_classes', default=2, type = int)
parser.add_argument('--n_batch', default=100, type = int)
parser.add_argument('--train_ratio', default=0.99, type = float)
parser.add_argument('--use_model_ckpt', default = None, type = str)
parser.add_argument('--model_file_name', default = 'tmp_nb')
parser.add_argument('--n_threads', default = 4, type = int)
parser.add_argument('--dq_size', default = 6, type = int)
parser.add_argument('--use_pretrain', default = True, type = bool)
FLAGS = parser.parse_args([])
print(FLAGS)

Namespace(batch_size=64, do_augment=True, dq_size=6, epochs=50, gpu_id=6, image_dir='/home/seanyu/datasets/cat_dog/dataset/', image_size=(256, 256, 3), lr=0.0001, model_file_name='tmp_nb', n_batch=100, n_classes=2, n_threads=4, save_dir='./result', train_ratio=0.99, use_model_ckpt=None, use_pretrain=True)


## Check path and load data

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu_id)
import tensorflow as tf
import keras
from keras.utils.np_utils import to_categorical

if not os.path.exists(FLAGS.save_dir):
    os.makedirs(FLAGS.save_dir)

model_dir = FLAGS.save_dir + '/model'
    
graphs_dir = FLAGS.save_dir + '/graphs'
if not os.path.exists(graphs_dir):
    os.makedirs(graphs_dir)

"""  Get data """
d_train = FLAGS.image_dir + '/train/'
d_test = FLAGS.image_dir + '/test1/'

image_train_list = glob.glob(d_train + '*.jpg')
image_test_list = glob.glob(d_test + '*.jpg')

df_train = pd.DataFrame({'img_path': image_train_list})
df_test = pd.DataFrame({'img_path': image_test_list})

df_train['cate'] = df_train.img_path.apply(os.path.basename)
df_train['cate'] = [i.split(".")[0] for i in list(df_train.cate)]
df_train.cate = df_train.cate.replace({'dog': 0, 'cat': 1})

nb_epoch = FLAGS.epochs

df_train_0, df_val_0 = train_test_split(df_train[df_train['cate'] == 0], test_size = 1-FLAGS.train_ratio)
df_train_1, df_val_1 = train_test_split(df_train[df_train['cate'] == 1], test_size = 1-FLAGS.train_ratio)

df_val = pd.concat((df_val_0, df_val_1)).reset_index(drop = True)

del df_val_0, df_val_1

  from ._conv import register_converters as _register_converters


In [13]:
def cv_load_and_resize(x, image_size, is_training = True, do_augment = False, seq = None):
    im_w, im_h, im_c = image_size
    im = cv2.imread(x)
    im = cv2.resize(im, (im_w, im_h))
    if do_augment and is_training:
        im = seq.augment_image(im)
    return im

# Read them all
x_train = np.array([cv_load_and_resize(i, image_size=FLAGS.image_size) for i in pd.concat((df_train_0, df_train_1)).img_path])
y_train = pd.concat((df_train_0, df_train_1)).cate.values
y_train = to_categorical(y_train)

x_val = np.array([cv_load_and_resize(i, image_size=FLAGS.image_size) for i in df_val.img_path])
y_val = df_val.cate.values
y_val = to_categorical(y_val)

print("Training set size: ", x_train.shape)
print("Validation set size: ", x_val.shape)

Training set size:  (24748, 256, 256, 3)
Validation set size:  (252, 256, 256, 3)


## Callbacks

In [7]:
class EarlyStopping():
    def __init__(self, patience, min_delta = 0.0001):
        # validation loss should at least be less than current min_loss - min_delta
        self.min_delta = min_delta 
        self.patience = patience
        self.epoch_count = 0
        self.min_loss = None
        self.stop = False
        
    def on_epoch_end(self, val_loss, *args, **kwargs):
        if self.min_loss is None or val_loss < self.min_loss - self.min_delta:
            self.min_loss = val_loss
            self.epoch_count = 0
        else:
            self.epoch_count += 1
            
        # if cumulative counts is larger than our patience, set the stop signal to True
        if self.epoch_count >= self.patience:
            self.stop = True
        
class Model_checkpoint():
    def __init__(self, model_name, save_best_only = True):
        self.min_loss = None
        self.model_name = model_name
        self.save_best_only = save_best_only
        
    def on_epoch_end(self, val_loss, nth_epoch, saver, sess, *args, **kwargs):
        if self.min_loss is None or val_loss < self.min_loss:
            print("== Validation loss has an improvement, save model ==")
            self.min_loss = val_loss
            save_path = saver.save(sess, self.model_name + '.ckpt')
            print("Model saved in path: %s" % save_path)
            
        if not self.save_best_only:
            saver.save(sess, self.model_name + '_' + str(nth_epoch) + '.ckpt',
                       global_step=nth_epoch)
        
class ReduceLROnPlateau():
    def __init__(self, lr, factor, patience, min_lr = 1e-10):
        self.lr = lr
        self.factor = factor
        self.patience = patience
        self.min_lr = min_lr
        self.min_loss = None
        self.epoch_count = 0
    
    def on_epoch_end(self, val_loss, *args, **kwargs):
        if self.min_loss is None or val_loss < self.min_loss:
            epoch_count = 0
            self.min_loss = val_loss
        else:
            self.epoch_count += 1
        
        if self.epoch_count == self.patience:
            self.lr *= self.factor
            self.epoch_count = 0
            
            if self.lr <= self.min_lr:
                self.lr = self.min_lr
                
class Run_collected_functions():
    def __init__(self, callback_dicts):
        self.on_session_begin = callback_dicts['on_session_begin']
        self.on_session_end = callback_dicts['on_session_end']
        self.on_batch_begin = callback_dicts['on_batch_begin']
        self.on_batch_end = callback_dicts['on_batch_end']
        self.on_epoch_begin = callback_dicts['on_epoch_begin']
        self.on_epoch_end = callback_dicts['on_epoch_end']
        
    def run_on_epoch_end(self, val_loss, nth_epoch = None, sess = None, saver = None):
        for func in self.on_epoch_end:
            getattr(func, 'on_epoch_end')(val_loss = val_loss,
                                          nth_epoch = nth_epoch,
                                          sess = sess,
                                          saver = saver)
        
    def run_on_session_end(self, *args, **kwargs):
        pass

## Build model

In [35]:
import keras.backend as K
from keras.layers import (
    Conv2D, 
    Input, 
    Dense, 
    GlobalAveragePooling2D, 
    Activation
)
from keras.models import (
    Model, 
    load_model, 
    save_model)
from keras.optimizers import (
    SGD, 
    Adam, 
    TFOptimizer)
from keras.callbacks import (
    Callback, 
    ReduceLROnPlateau, 
    ModelCheckpoint, 
    EarlyStopping)

from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.utils.generic_utils import get_custom_objects

K.clear_session()

if FLAGS.use_pretrain:
    epoch_to_run = 10
    res_model = ResNet50(include_top=False, input_shape=FLAGS.image_size)
    
    x = GlobalAveragePooling2D()(res_model.output)
    out = Dense(units=2, activation="softmax")(x)
    
    model = Model(inputs=[res_model.input], 
                  outputs=[out])
else:
    epoch_to_run = 50
    
optim = Adam(lr = FLAGS.lr)
model.compile(loss='categorical_crossentropy', metrics=['acc'], optimizer=optim)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 128, 128, 64) 9472        input_1[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 128, 128, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 128, 128, 64) 0           bn_conv1[0][0]                   
__________________________________________________________________________________________________
max_poolin

In [None]:
model.fit(x_train, y_train, 
          batch_size=FLAGS.batch_size, 
          epochs=epoch_to_run, 
          shuffle=True, verbose=1, 
          validation_data=(x_val, y_val))


Train on 24748 samples, validate on 252 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100