In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import models, layers, optimizers
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.inception_v3 import InceptionV3
import matplotlib.pyplot as plt
import os, shutil, zipfile
import numpy as np
import pandas as pd

In [None]:
# zipfile processing
data_zip_dir = '/datasets/dogs-vs-cats/dogs-vs-cats.zip'
data_dir = './datasets/dogs-vs-cats'

if os.path.isdir(os.path.join(data_dir)):
    pass
else:
    with zipfile.ZipFile(data_zip_dir, 'r') as z:
        z.extractall(data_dir)
        

train_zip_dir = os.path.join(data_dir, 'train.zip')
train_dir = os.path.join(data_dir, 'train')
with zipfile.ZipFile(train_zip_dir, 'r') as z:
    z.extractall(data_dir)

# test_zip_dir = os.path.join(data_dir, 'test1.zip')
# test_dir = os.path.join(data_dir, 'test1')
# with zipfile.ZipFile(test_zip_dir, 'r') as z:
#     z.extractall(data_dir)

In [None]:
# dataset directory makeing func
def make_dataset_dir(new_data_dir = './datasets/dogs-vs-cats_preparared'):
    os.makedirs(new_data_dir, exist_ok=True)

    # train
    train_set_dir = os.path.join(new_data_dir, 'train_set')
    train_dog_dir = os.path.join(train_set_dir, 'dog')
    train_cat_dir = os.path.join(train_set_dir, 'cat')
    os.makedirs(train_dog_dir, exist_ok=True)
    os.makedirs(train_cat_dir, exist_ok=True)

    # valid
    valid_set_dir = os.path.join(new_data_dir, 'valid_set')
    valid_dog_dir = os.path.join(valid_set_dir, 'dog')
    valid_cat_dir = os.path.join(valid_set_dir, 'cat')
    os.makedirs(valid_dog_dir, exist_ok=True)
    os.makedirs(valid_cat_dir, exist_ok=True)

    # test
    test_set_dir = os.path.join(new_data_dir, 'test_set')
    test_dog_dir = os.path.join(test_set_dir, 'dog')
    test_cat_dir = os.path.join(test_set_dir, 'cat')
    os.makedirs(test_dog_dir, exist_ok=True)
    os.makedirs(test_cat_dir, exist_ok=True)
    return train_set_dir, train_dog_dir, train_cat_dir, \
            valid_set_dir, valid_dog_dir, valid_cat_dir, \
            test_set_dir, test_dog_dir, test_cat_dir

In [None]:
new_data_dir = './datasets/dogs-vs-cats_preparared'
data_dir = './datasets/dogs-vs-cats'
train_dir = os.path.join(data_dir, 'train')

# init
if os.path.isdir(new_data_dir):
    shutil.rmtree(new_data_dir)
(train_set_dir, train_dog_dir, train_cat_dir, \
 valid_set_dir, valid_dog_dir, valid_cat_dir, \
    test_set_dir, test_dog_dir, test_cat_dir) = make_dataset_dir()

# image file names list
dog_files = [f'dog.{i}.jpg' for i in range(12500)]
cat_files = [f'cat.{i}.jpg' for i in range(12500)]
 
# move images to new_data dir
train = 2000
valid = 1000
test = 1000

if train+valid+test > 25000:
    raise Exception('out of datasets')

for file in dog_files[:int(train/2)]:
    src = os.path.join(train_dir, file)
    dst = os.path.join(train_dog_dir, file)
    shutil.copy(src, dst)
    
for file in dog_files[int(train/2):int(train/2)+int(valid/2)]:
    src = os.path.join(train_dir, file)
    dst = os.path.join(valid_dog_dir, file)
    shutil.copy(src, dst)

for file in dog_files[int(train/2)+int(valid/2):int(train/2)+int(valid/2)+int(test/2)]:
    src = os.path.join(train_dir, file)
    dst = os.path.join(test_dog_dir, file)
    shutil.copy(src, dst)

for file in cat_files[:int(train/2)]:
    src = os.path.join(train_dir, file)
    dst = os.path.join(train_cat_dir, file)
    shutil.copy(src, dst)
    
for file in cat_files[int(train/2):int(train/2)+int(valid/2)]:
    src = os.path.join(train_dir, file)
    dst = os.path.join(valid_cat_dir, file)
    shutil.copy(src, dst)

for file in cat_files[int(train/2)+int(valid/2):int(train/2)+int(valid/2)+int(test/2)]:
    src = os.path.join(train_dir, file)
    dst = os.path.join(test_cat_dir, file)
    shutil.copy(src, dst)

In [None]:
print(f'# of train set : {len(os.listdir(train_dog_dir)) + len(os.listdir(train_cat_dir))}')
print(f'# of valid set : {len(os.listdir(valid_dog_dir)) + len(os.listdir(valid_cat_dir))}')
print(f'# of test set : {len(os.listdir(test_dog_dir)) + len(os.listdir(test_cat_dir))}')

In [None]:
# set image generators
train_dir = train_set_dir
validation_dir = valid_set_dir
test_dir = test_set_dir

train_datagen = ImageDataGenerator(rescale=1./255,
                    rotation_range=20, shear_range=0.1,
                    width_shift_range=0.1, height_shift_range=0.1,
                    zoom_range=0.1, horizontal_flip=True, fill_mode='nearest')
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')
test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')
validation_generator = validation_datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

In [None]:
# model definition
input_shape = [150, 150, 3] # as a shape of image
def build_model():
    model=models.Sequential()
    # conv_base = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    conv_base = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    conv_base.trainable=False
    model.add(conv_base)
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    # compile
    model.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4),
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

# main loop without cross-validation
import time
starttime=time.time()
num_epochs = 30
model = build_model()
history = model.fit_generator(train_generator,
                    epochs=num_epochs, steps_per_epoch=100,
                    validation_data=validation_generator, validation_steps=50)

# saving the model
model.save('cats_and_dogs_small_pretrained.h5')

# evaluation
train_loss, train_acc = model.evaluate_generator(train_generator)
test_loss, test_acc = model.evaluate(test_generator)
print('train_acc:', train_acc)
print('test_acc:', test_acc)
print("elapsed time (in sec): ", time.time()-starttime)

In [None]:
# visualization
def plot_acc_loss(h):
    plt.figure(figsize=(15.6, 4.8), dpi=100)
    plt.subplot(1,2,1)
    plt.plot(h.history['accuracy'])
    plt.plot(h.history['val_accuracy'])
    plt.title('Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Training', 'Validation'], loc=0)
    
    plt.subplot(1,2,2)
    plt.plot(h.history['loss'])
    plt.plot(h.history['val_loss'])
    plt.title('Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Training', 'Validation'], loc=0)
    plt.show()

model.summary()
plot_acc_loss(history)

In [None]:
# Unfreezing func
def unfreeze_model(model):
    for layer in model.layers[249:]:
        layer.trainable = True
    model.compile(optimizer=optimizers.RMSprop(learning_rate=1e-5),
                    loss='binary_crossentropy', metrics=['accuracy'])
    
unfreeze_model(model)

In [None]:
# main loop without cross-validation
import time
starttime=time.time()
num_epochs = 50
model = models.load_model('./cats_and_dogs_small_pretrained.h5')
history = model.fit_generator(train_generator,
                    epochs=num_epochs, steps_per_epoch=100,
                    validation_data=validation_generator, validation_steps=50)

# saving the model
model.save('cats_and_dogs_small_finetuned.h5')

# evaluation
train_loss, train_acc = model.evaluate_generator(train_generator)
test_loss, test_acc = model.evaluate(test_generator)
print('train_acc:', train_acc)
print('test_acc:', test_acc)
print("elapsed time (in sec): ", time.time()-starttime)

In [None]:
model.summary()
plot_acc_loss(history)