In [1]:
# uncomment on first runs
!git clone https://github.com/trung8134/NCKH_xAI.git

Cloning into 'NCKH-FR-2023'...
remote: Enumerating objects: 102, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 102 (delta 19), reused 29 (delta 12), pack-reused 66[K
Receiving objects: 100% (102/102), 41.42 KiB | 5.92 MiB/s, done.
Resolving deltas: 100% (58/58), done.


In [None]:
pip install gdown

In [None]:
import os
import gdown

# Go to the main folder
os.chdir('NCKH_xAI')
# Create folder 'datasets'
os.makedirs('Datasets', exist_ok=True)
# Save folder 
current_directory = os.getcwd()
# Direct to folder 'Datasets'
os.chdir('Datasets')
# URL file
file_url = 'https://drive.google.com/uc?id=1iMr55JfOhJRnrSzu9i70v7wISrHQGVoE'
# Download file zip
gdown.download(file_url, output='training.zip')
# Extract file zip
os.system('unzip training.zip')
# Return folder before 'Datasets'
os.chdir(current_directory)

### Import lib

You can choose our model: EfficientNetB0_model, MobileNetV1_model, InceptionV3_model, ResNet50_model, VGG16_model

If you want to customize the classifier, go to model_transfer -> classifier

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from model_transfer.models import EfficientNetB0_model
from dataset import split_data, create_gens
from callbacks import MyCallback
from plot_history_of_training import plot_training
from plot_image import show_images

# Start Reading Dataset

Load datasets

In [None]:
data_dir = 'C:/Users/caotr/D. Computer Science/Data Science/DL/Project/NCKH-2024/Datasets/training'

try:
    # Get splitted data: train/valid/test = 80/10/10
    train_df, valid_df, test_df = split_data(data_dir)

    # Get Generators
    batch_size = 40
    train_gen, valid_gen, test_gen = create_gens(train_df, valid_df, test_df, batch_size)
except:
    print('Invalid Input')

Visualization samples images

In [None]:
# Display Image Sample
show_images(train_gen)

# Build the model


In [None]:
# Generic Model Creation
hidden_layer = 256
dropout_rate = 0.5
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(list(train_gen.class_indices.keys())) # to define number of classes in dense layer

model = EfficientNetB0_model(img_shape, class_count, hidden_layer, dropout_rate)
model.summary()

In [None]:
# Show parameter classification layer
model.layers[1].summary()

Set Callback Parameters

In [7]:
# Set Callback Parameters
batch_size = 40   # set batch size for training
epochs = 40   # number of all epochs in training
patience = 1   # number of epochs to wait to adjust lr if monitored value does not improve
stop_patience = 3   # number of epochs to wait before stopping training if monitored value does not improve
threshold = 0.9   # if train accuracy is < threshold adjust monitor accuracy, else monitor validation loss
factor = 0.5   # factor to reduce lr by
ask_epoch = 5   # number of epochs to run before asking if you want to halt training
batches = int(np.ceil(len(train_gen.labels) / batch_size))    # number of training batch to run per epoch

callbacks = [MyCallback(model= model, patience= patience, stop_patience= stop_patience, threshold= threshold,
            factor= factor, batches= batches, epochs= epochs, ask_epoch= ask_epoch)]

Train model

In [8]:
# Training
history = model.fit(x= train_gen, epochs=epochs, verbose=0, callbacks=callbacks,
                    validation_data=valid_gen, validation_steps=None, shuffle=False)

Do you want model asks you to halt the training [y/n] ?
 Epoch     Loss   Accuracy  V_loss    V_acc     LR     Next LR  Monitor  % Improv  Duration


Display model performance

In [None]:
plot_training(history)

# Evaluate model

In [None]:
# Find the smallest batch number(test_steps) with the number in the batch(test_batch_size) not exceeding 80 
ts_length = len(test_df)
test_batch_size = test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size

train_score = model.evaluate(train_gen, steps=test_steps, verbose=1)
valid_score =model.evaluate(valid_gen, steps=test_steps, verbose=1)
test_score =model.evaluate(test_gen, steps=test_steps, verbose=1)

print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Validation Loss: ", valid_score[0])
print("Validation Accuracy: ", valid_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])

Confusion Matrics and Classification Report

In [None]:
preds = model.predict_generator(test_gen)
y_pred = np.argmax(preds, axis=1)
print(y_pred)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
g_dict = test_gen.class_indices
classes = list(g_dict.keys())

# Classification report
print(classification_report(test_gen.classes, y_pred, target_names=classes))

In [None]:
# Confusion matrix
cm = confusion_matrix(test_gen.classes, y_pred)

# Vẽ confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()