<a href="https://colab.research.google.com/github/yiyichanmyae/tensorflow/blob/master/2_3_transferlearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import urllib.request
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.optimizers import RMSprop
from shutil import copyfile
import shutil
import math

def getDataFromURL( url, filename):
  urllib.request.urlretrieve(url, filename)


def extractZipFile( zipfile_, destinationdir):
  zip_ref = zipfile.ZipFile(zipfile_, 'r')
  zip_ref.extractall(destinationdir)
  zip_ref.close()

def checkNumOfSamples(dir):
  '''
    check number of samples under a direcotory
  '''
  return len(os.listdir(dir))


def make_train_val_dir(root_dir):
  '''
    make training and validation directories
  '''
  os.mkdir(root_dir)

  sub_dir1 = ["training", "validation"]
  sub_dir2 = ["cat", "dog"]
  for first in sub_dir1:
    outter_dir = os.path.join(root_dir, first)

    for second in sub_dir2:
      inner_dir = os.path.join(outter_dir, second)
      os.makedirs(inner_dir)

def check_directories(root_dir):
  '''
    check the directoires and subdirectories ?
  '''
  for rootdir, dirs, files in os.walk(root_dir):
    for subdir in dirs:
      print(os.path.join(rootdir, subdir),'\n')

def split_train_val(source_dir, train_dir, val_dir, train_percent):
  '''
    split train and validation data with the percentage
  '''
  total = checkNumOfSamples(source_dir)
  train_ = math.ceil(total * train_percent)

  count = 0
  for img in os.listdir(source_dir):
    current_img = os.path.join(source_dir, img)
    if os.path.getsize(current_img) > 0 :
      if count <= train_:
        copyfile(current_img, os.path.join(train_dir, img))
        count += 1
      else:
        copyfile(current_img, os.path.join(val_dir, img))
    else:
      print(f'"{img}" file size is zero. so ignore the file.\n' )

# Image Data Generator
def createTrainValGenerator(train_dir, val_dir):
  generator_params = dict(
      rotation_range=30,
      width_shift_range=0.1,
      height_shift_range=0.1,
      brightness_range= [0.1, 0.2],
      shear_range=0.1,
      zoom_range=0.1,
      channel_shift_range=0.1,
      fill_mode='nearest',
      cval=0.0,
      horizontal_flip=True,
      rescale=1./255
  )
  flow_params = dict(
      target_size=(300, 300),
      batch_size=100,
      class_mode='binary'
  )

  # train generator
  train_generator_obj = ImageDataGenerator(**generator_params)
  flow_params['directory'] = train_dir
  train_generator = train_generator_obj.flow_from_directory(**flow_params)

  # valid generator
  val_generator_obj = ImageDataGenerator(rescale=1./255)
  flow_params['directory'] = val_dir
  val_generator = val_generator_obj.flow_from_directory(**flow_params)

  return train_generator, val_generator

# create model
def build_model(input_, output_):
  x = layers.Flatten()(output_)
  x = layers.Dense(1024, activation="relu")(x)
  x = layers.Dropout(0.2)(x)
  x = layers.Dense(512, activation="relu")(x)
  x = layers.Dense(1, activation="sigmoid")(x)

  model = Model(input_, x)

  model.compile(
      loss="binary_crossentropy", #RMSprop(lr=0.0001),
      optimizer="adam",
      metrics=["accuracy"]
  )

  model.summary()
  return model



# download data
data_url = "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip"
zipfilename = "catsdogs.zip"
data_directory = "img/"

getDataFromURL(data_url, zipfilename)
extractZipFile(zipfilename, data_directory)

'''
  we will see a directory called "PetImages" under the data_directory we assigned.
  there are two subdirectories under "PetImages", named "Cat" and "Dog"
'''



# check number of samples
source_cat_dir = os.path.join(data_directory, "PetImages/Cat")
source_dog_dir = os.path.join(data_directory, "PetImages/Dog")
print(f" Cat --> {checkNumOfSamples(source_cat_dir)}")
print(f" Dog --> {checkNumOfSamples(source_dog_dir)}")




'''
  we have to split the data into training and validation
'''


# make root directory
root_dir = "img/cats_dogs"

# Empty directory to prevent FileExistsError is the function is run several times
if os.path.exists(root_dir):
  shutil.rmtree(root_dir)
try:
  make_train_val_dir(root_dir)
except FileExistsError:
  print( "directories already existed")


# Check the created directories
check_directories(root_dir)

train_dir = os.path.join(root_dir, "training")
val_dir = os.path.join(root_dir, "validation")
train_cat_dir = os.path.join(train_dir, "cat")
train_dog_dir = os.path.join(train_dir, "dog")
valid_cat_dir = os.path.join(val_dir, "cat")
valid_dog_dir = os.path.join(val_dir, "dog")



# Train Validation Split
# copy from source to the subdirectories of Train and Validation
train_percent = 0.8
if len(os.listdir(source_cat_dir)) > 0:
  split_train_val(source_cat_dir, train_cat_dir, valid_cat_dir, train_percent)

if len(os.listdir(source_dog_dir)) > 0:
  split_train_val(source_dog_dir, train_dog_dir, valid_dog_dir, train_percent)

# train validation generator
train_generator, val_generator = createTrainValGenerator(train_dir, val_dir)

# get pretrained model
pretrained_url = "https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5"
pretrained_file = "inception_v3.h5"
getDataFromURL(pretrained_url, pretrained_file)

pretrained_model = InceptionV3(input_shape=(300, 300, 3),
                                include_top=False,
                                weights=None)

pretrained_model.load_weights(pretrained_file)

for layer in pretrained_model.layers:
    layer.trainable = False

# pre_trained_model.summary()
last_layer = pretrained_model.get_layer('mixed7')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

# build model
model = build_model(pretrained_model.input, last_output)

# train model
history = model.fit(
            train_generator,
            validation_data=val_generator,
            epochs=20,
            steps_per_epoch=20, #(num_samples / batch_size).
            verbose=1)

# save the model
model.save("catsdogs.h5")

 Cat --> 12501
 Dog --> 12501
img/cats_dogs/validation 

img/cats_dogs/training 

img/cats_dogs/validation/dog 

img/cats_dogs/validation/cat 

img/cats_dogs/training/dog 

img/cats_dogs/training/cat 

"666.jpg" file size is zero. so ignore the file.

"11702.jpg" file size is zero. so ignore the file.

Found 20002 images belonging to 2 classes.
Found 4996 images belonging to 2 classes.
last layer output shape:  (None, 17, 17, 768)
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 300, 300, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 149, 149, 32  86



Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
