Name: Zhuoyou Shen

ID: 2035073656

GitHub Username: zhuoyous

In [1]:
import pandas as pd
import os
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,accuracy_score,precision_score,recall_score
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications import VGG16
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
from sklearn.metrics import classification_report
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')

### Tensorflow Version : 2.9.1

In [2]:
#function to report Precision, Recall, and F1 score
def metric_score(true_label,pred_label):
    print("The accuracy_score is: {:.3f}".format(accuracy_score(true_label,pred_label)))
    print("The precision_score is: {:.3f}".format(precision_score(true_label,pred_label,average="macro")))
    print("The recall_score is: {:.3f}".format(recall_score(true_label,pred_label,average="macro")))
    print("The f1_score is: {:.3f}".format(f1_score(true_label,pred_label,average="macro")))

### Load data for EfficientNetB0

In [3]:
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    brightness_range=(0.3, 1),
    horizontal_flip=True,
    vertical_flip=True)
train_generator = train_datagen.flow_from_directory(
    '../data/images',
    target_size=(224, 224),
    batch_size=1,
    shuffle=True,
    class_mode='categorical')

Found 1176 images belonging to 20 classes.


In [4]:
all_data = np.zeros((len(train_generator),224,224,3))
all_label = np.zeros((len(train_generator),20))
for index in range(len(train_generator)):
    all_data[index,:,:,:] = train_generator[index][0]
    all_label[index,:] = train_generator[index][1]
all_data.shape,all_label.shape

((1176, 224, 224, 3), (1176, 20))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(all_data,all_label,test_size=0.3,random_state=0)
X_test, X_valid, y_test, y_valid = train_test_split(X_test,y_test,test_size=0.5,random_state=0,shuffle = False)
print("The proportion of train",X_train.shape[0] / len(train_generator))
print("The proportion of test",X_test.shape[0] / len(train_generator))
print("The proportion of valid",X_valid.shape[0] / len(train_generator))

The proportion of train 0.6998299319727891
The proportion of test 0.14965986394557823
The proportion of valid 0.15051020408163265


#### EfficientNetB0

In [6]:
# Download pre training model
model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224,224,3))

# Freeze all layers of the pre training model
for layer in model.layers:
    layer.trainable = False

# Add a new classifier
x = model.output
x = keras.layers.GlobalMaxPooling2D()(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dense(1024, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
predictions = keras.layers.Dense(20, activation='softmax')(x)

# Build a complete model
model = keras.Model(inputs=model.input, outputs=predictions)
# Compile model
optimizer = Adam(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [7]:
callbacks_list = [EarlyStopping(monitor='val_loss', patience=50),
                  ModelCheckpoint(filepath='EfficientNetB0_model.h5', monitor='val_loss', save_best_only=True) ]

history = model.fit(X_train, y_train, batch_size=5,
                    epochs=100, validation_data=(X_valid, y_valid), callbacks=callbacks_list)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100


In [8]:
pred_prob = model.predict(X_test)
pred_value = pred_prob.argmax(axis=1)
y_true = y_test.argmax(axis=1)



### Model Accuracy

In [9]:
print(classification_report(y_true, pred_value))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      0.88      0.93         8
           2       0.60      1.00      0.75         6
           3       0.83      0.42      0.56        12
           4       0.64      0.64      0.64        11
           5       0.88      0.54      0.67        13
           6       0.86      1.00      0.92         6
           7       0.42      0.83      0.56         6
           8       0.86      0.86      0.86         7
           9       1.00      0.80      0.89        15
          10       0.73      1.00      0.85        11
          11       0.75      1.00      0.86        12
          12       0.50      0.50      0.50         4
          13       0.75      0.38      0.50         8
          14       0.54      0.88      0.67         8
          15       0.80      0.50      0.62         8
          16       1.00      0.89      0.94         9
          17       0.70    

In [10]:
metric_score(y_true,pred_value)

The accuracy_score is: 0.756
The precision_score is: 0.782
The recall_score is: 0.769
The f1_score is: 0.749


### Load data for VGG16

In [11]:
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    brightness_range=(0.3, 1),
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function=preprocess_input)
train_generator = train_datagen.flow_from_directory(
    '../data/images',
    target_size=(224, 224),
    batch_size=1,
    shuffle=True,
    class_mode='categorical')

Found 1176 images belonging to 20 classes.


In [12]:
all_data = np.zeros((len(train_generator),224,224,3))
all_label = np.zeros((len(train_generator),20))
for index in range(len(train_generator)):
    all_data[index,:,:,:] = train_generator[index][0]
    all_label[index,:] = train_generator[index][1]
all_data.shape,all_label.shape

((1176, 224, 224, 3), (1176, 20))

In [13]:
X_train, X_test, y_train, y_test = train_test_split(all_data,all_label,test_size=0.3,random_state=0)
X_test, X_valid, y_test, y_valid = train_test_split(X_test,y_test,test_size=0.5,random_state=0,shuffle = False)
print("The proportion of train",X_train.shape[0] / len(train_generator))
print("The proportion of test",X_test.shape[0] / len(train_generator))
print("The proportion of valid",X_valid.shape[0] / len(train_generator))

The proportion of train 0.6998299319727891
The proportion of test 0.14965986394557823
The proportion of valid 0.15051020408163265


#### VGG16

In [14]:
# Download pre training model
model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))

# Freeze all layers of the pre training model
for layer in model.layers:
    layer.trainable = False

# Add a new classifier
x = model.output
x = keras.layers.GlobalMaxPooling2D()(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Dense(1024, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
predictions = keras.layers.Dense(20, activation='softmax')(x)

# Build a complete model
model = keras.Model(inputs=model.input, outputs=predictions)
# Compile model
optimizer = Adam(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [15]:
# Training model
callbacks_list = [EarlyStopping(monitor='val_loss', patience=50),
                  ModelCheckpoint(filepath='VGG16_model.h5', monitor='val_loss', save_best_only=True) ]

history = model.fit(X_train, y_train, batch_size=5,
                    epochs=100, validation_data=(X_valid, y_valid),callbacks=callbacks_list)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100


In [16]:
pred_prob = model.predict(X_test)
pred_value = pred_prob.argmax(axis=1)
y_true = y_test.argmax(axis=1)



### Model Accuracy

In [17]:
print(classification_report(y_true, pred_value))

              precision    recall  f1-score   support

           0       1.00      0.62      0.77         8
           1       0.50      0.56      0.53         9
           2       0.82      0.69      0.75        13
           3       0.83      0.45      0.59        11
           4       0.25      0.25      0.25         8
           5       0.43      0.67      0.52         9
           6       0.56      0.62      0.59         8
           7       0.33      0.50      0.40         4
           8       0.86      0.60      0.71        10
           9       0.88      0.78      0.82         9
          10       1.00      0.42      0.59        12
          11       0.88      0.88      0.88         8
          12       0.00      0.00      0.00         3
          13       0.33      0.44      0.38         9
          14       0.35      0.67      0.46         9
          15       0.57      0.50      0.53         8
          16       0.79      0.85      0.81        13
          17       0.67    

In [18]:
metric_score(y_true,pred_value)

The accuracy_score is: 0.597
The precision_score is: 0.622
The recall_score is: 0.571
The f1_score is: 0.575
