TransferLearning

In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.applications.efficientnet import EfficientNetB6, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers, metrics, layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import plot_model
from IPython.display import Image
import matplotlib.pyplot as plt

In [2]:
#initiate the number of menus you want the model to classify
num_class = 89

In [3]:
#Download the baseline model: EfficientNet
#Options: EfficientNetB0, EfficientNetB1, ... , EfficientNetB7
#The higher the number, the more complex the model is
#Top layer is not included since we want to perform transfer learning

#B6 is used in this case
conv_base = EfficientNetB6(input_shape=(224,224,3), include_top=False)

In [4]:
#Perform augmentation on training data to increase variation and avoid overfitting
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
)

# For validation
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

In [5]:
#Initiate batch_size
batch_size = 16

#Generate training data:
train_generator = train_datagen.flow_from_directory(
    # This is the target directory
    "dev_data/train_" + str(num_class) + "classes",
    # All images will be resized to target height and width.
    target_size=(224, 224),
    batch_size=batch_size,
    # Since we use categorical_crossentropy loss, we need categorical labels
    class_mode="categorical",
)

Found 25303 images belonging to 89 classes.


In [6]:
#Generate validation data:
validation_generator = test_datagen.flow_from_directory(
    # This is the target directory
    "dev_data/validation_" + str(num_class) + "classes",
    # All images will be resized to target height and width.
    target_size=(224, 224),
    batch_size=batch_size,
    # Since we use categorical_crossentropy loss, we need categorical labels
    class_mode="categorical",
)

Found 2960 images belonging to 89 classes.


In [7]:
#Freezing based model didn't work well in my case so I decided to unfreeze all layers
conv_base.trainable=True

In [8]:
#Define dropout_rate
dropout_rate = 0.5

#Create a model
model = models.Sequential()
#Let's start with the base model you downloaded earlier
model.add(conv_base)

#add dropout layer
if dropout_rate > 0:
    model.add(layers.Dropout(dropout_rate, name="dropout_out4"))

#Add an activation layer with regularization (L1 norm)
model.add(layers.Dense(512, activation="relu", name="relu1", kernel_regularizer='l1'))
if dropout_rate > 0:
    model.add(layers.Dropout(dropout_rate, name="dropout_out1"))

#Add a pooling layer
model.add(layers.GlobalMaxPooling2D(name="gmp")) 

#Add the fully-connected layer
model.add(layers.Dense(num_class, activation="softmax", name="fc_out"))

In [9]:
#This will show the summary of the model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb6 (Functional)  (None, 7, 7, 2304)        40960143  
_________________________________________________________________
dropout_out4 (Dropout)       (None, 7, 7, 2304)        0         
_________________________________________________________________
relu1 (Dense)                (None, 7, 7, 512)         1180160   
_________________________________________________________________
dropout_out1 (Dropout)       (None, 7, 7, 512)         0         
_________________________________________________________________
gmp (GlobalMaxPooling2D)     (None, 512)               0         
_________________________________________________________________
fc_out (Dense)               (None, 89)                45657     
Total params: 42,185,960
Trainable params: 41,961,521
Non-trainable params: 224,439
______________________________________

In [10]:
#To show the whole architecture of the base model
#conv_base.summary()

In [11]:
#Define optimizer, loss function, and metric
model.compile(optimizer=optimizers.Adam(lr = 0.0001),
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy']) 

In [12]:
"""The following lines can be modified to set a callbacks function:
Here we have Earlystopping, Modelcheckpoint, and CSVLogger which allow the training process to stop when the model performance stops improving 
save the respective weights to the predefined path and take log of every process. In this case, we want to minimize the cost function of the validation dataset.
"""
model_path = 'dev_model/' + 'B6_' + str(num_class) + 'classes' + '-{epoch:02d}-{val_loss:.2f}.h5'
#naming convention: (name of the process)_(number of classes)-(number of epoch)-(evaluation metrics: val_loss).h5
keras_callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.6, patience=2, min_lr=0.000001, verbose=1),
    ModelCheckpoint(filepath=model_path, monitor='val_loss', verbose = 1, save_best_only=True, mode='min'),
    CSVLogger("dev_model/B6-Final_log.csv")]

In [None]:
#Let's fit the model!
history = model.fit_generator(
    train_generator,
    steps_per_epoch=len(train_generator.classes) // batch_size,
    epochs=100,
    validation_data=validation_generator,
    validation_steps=len(validation_generator.classes)// batch_size,
    verbose=1,
    use_multiprocessing=False,
    callbacks=keras_callbacks
)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/100
Epoch 00001: val_loss improved from inf to 6.90537, saving model to dev_model/B6_89classes-01-6.91.h5
Epoch 2/100
Epoch 00002: val_loss improved from 6.90537 to 4.70724, saving model to dev_model/B6_89classes-02-4.71.h5
Epoch 3/100
  40/1581 [..............................] - ETA: 17:51 - loss: 4.1728 - categorical_accuracy: 0.4453

In [None]:
#Let's plot the training result
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0,10) #"ไว้มาเปลี่ยนเป็น max loss, min loss"
plt.savefig('dev_model/B6_89classes_plot.png')
plt.show()

In [None]:
#Let's plot the accuracy 
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.savefig('dev_model/B6_89classes_acc.png')
plt.show()

In [None]:
#Let's plot the loss 
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.savefig('dev_model/B6_89classes_loss.png')
plt.show()