In [None]:
import numpy as np
import pandas as pd
import os
import random
import matplotlib.pyplot as plt
import seaborn as sns
import string
import cv2

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

import glob
import tensorflow as tf
from tensorflow.keras.utils import load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import models
from tensorflow.keras import callbacks
from tensorflow.keras.utils import plot_model

# random seed
SEED = 99
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
print(os.listdir("D:\Final Year Project\Classification\input"))

In [None]:
def get_labels_images(path):
    labels = []
    images = []
    directories = []
    for directory in os.listdir(path):
        for Label in os.listdir(path+'/'+directory):
            for Image in os.listdir(path+'/'+directory+'/'+Label):
                directories.append(directory)
                labels.append(Label)
                images.append(directory +'/'+Label+'/'+Image)
    return pd.DataFrame({'directories':directories,'labels':labels,'images':images})

In [None]:
dataset_dir = os.path.abspath('D:/Final Year Project/Classification/input/dataset5/')
df = get_labels_images(dataset_dir)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
temp_df = df.sort_values(by=['labels'])
alphabet_labels = np.array(list(string.ascii_lowercase))
print_labels = np.array(temp_df['labels'].unique())
set_diff = np.setdiff1d(alphabet_labels, print_labels)
print(print_labels)
print("Missing Letters:",set_diff)

In [None]:
def random_sample_plot(X):
    plt.figure(figsize=(15, 18))
    for i in range(16):
        plt.subplot(4, 4, i+1)
        sample = random.choice(X['images'])
        image = load_img(dataset_dir+'/'+sample,target_size=(64,64))
        plt.imshow(image)
        plt.title("label:{},\nimage:{}".format(X[X['images']==sample]['labels'].values,sample))
    plt.show()

In [None]:
random_sample_plot(df)

In [None]:
color_df = df[df['images'].map(
    lambda x: True if x.find('color')!=-1 else False)].reset_index(drop=True)
deep_df = df[df['images'].map(
    lambda x: True if x.find('color')==-1 else False)].reset_index(drop=True)

In [None]:
df.shape[0]==(color_df.shape[0]+deep_df.shape[0])

In [None]:
random_sample_plot(color_df)

In [None]:
random_sample_plot(deep_df)

In [None]:
def plot_data(data):
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20,10))
    ax1.set_title('Bar Graph Count of Letter Signs')
    ax2.set_title('Pie Chart Graph Count of Letter Signs')
    sns.countplot(x=data['labels'], ax=ax1)
    data['labels'].value_counts().plot.pie(autopct='%1.1f%%',shadow=False,textprops={'fontsize': 15},ax=ax2)
    plt.show()
    
plot_data(df)

In [None]:
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS= 3
BATCH_SIZE = 128
LABEL_CLASS = 24
LEARNINGRATE = 1e-5
WEIGHTS = 'D:/Final Year Project/Classification/input/VGG19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5'

datagen = ImageDataGenerator(rescale=1.0/255)

data_gen_args = dict(directory=dataset_dir, 
                     x_col='images',
                     y_col='labels',
                     target_size=IMAGE_SIZE,
                     class_mode='categorical',
                     batch_size=BATCH_SIZE,
                     seed = SEED)

early_stop = callbacks.EarlyStopping(
    monitor='val_loss',
    patience = 5,
    verbose = 1, 
    restore_best_weights=True
    )

learningrate_reduction = callbacks.ReduceLROnPlateau(
    monitor='val_loss', 
    patience= 2, 
    verbose= 1, 
    )

callbacks = [early_stop,learningrate_reduction]

EPOCHS = 25

In [None]:
def split_data(data):    
    train_df, test_df = train_test_split(data, test_size=0.10, random_state=SEED)
    train_df, val_df = train_test_split(train_df, test_size=0.15, random_state=SEED)

    train_df = train_df.reset_index(drop=True)
    val_df = val_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)
    
    print('----------------------------------------------------------------')
    print("The Number of Samples per Split")
    print('----------------------------------------------------------------')
    print('Number of   training samples : {}'.format(train_df.shape[0]))
    print('Number of validation samples : {}'.format(val_df.shape[0]))
    print('Number of       test samples : {}'.format(test_df.shape[0]))
    print('----------------------------------------------------------------')
    
    return train_df, val_df, test_df

In [None]:
def define_image_generators(train_df, val_df, test_df):
    train_generator = datagen.flow_from_dataframe(train_df, **data_gen_args)
    val_generator = datagen.flow_from_dataframe(val_df, **data_gen_args)
    test_generator = datagen.flow_from_dataframe(test_df, **data_gen_args, shuffle = False)
    return train_generator, val_generator, test_generator


In [None]:
def build_model():
    base = VGG19(include_top = False, weights = WEIGHTS, input_shape = (IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS))
    X = base.output
    X = layers.Flatten()(X)
    X = layers.Dense(512, activation = 'relu')(X)
    X = layers.Dropout(0.4)(X)
    X = layers.BatchNormalization()(X)
    X = layers.Dense(512, activation = 'relu')(X)
    X = layers.Dropout(0.3)(X)
    X = layers.BatchNormalization()(X)
    
    # output
    preds = layers.Dense(LABEL_CLASS, activation = 'softmax')(X)
    model = models.Model(inputs = base.input, outputs = preds)
    model.compile(optimizer = optimizers.RMSprop(learning_rate = LEARNINGRATE), loss = 'categorical_crossentropy', metrics = ['accuracy'])
    
    return model

In [None]:
def plot_accuracy_loss(history):    
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(1, len(acc) + 1)

    plt.figure(figsize = (10, 7))

    plt.subplot(1,2,1)
    plt.plot(epochs, acc, c = 'b', label = 'Training Accuracy')
    plt.plot(epochs, val_acc, c = 'g', label = 'Validation Accuracy')
    plt.title('Training vs. Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(epochs, loss, c = 'b', label = 'Training Loss')
    plt.plot(epochs, val_loss, c = 'g', label = 'Validation Loss')
    plt.title('Training vs. Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

In [None]:
def show_accuracy_loss(model, test_generator):
    print('---------------Evaluation Against Test Data---------------------')
    eval_loss, eval_acc = model.evaluate(test_generator)
    print('Evaluation Loss: {:.4f}, Evaluation Accuracy: {:.2f}'.format(eval_loss, eval_acc * 100))
    print('----------------------------------------------------------------')

In [None]:
def show_predictions(model, test_generator, directory):    
    preds = model.predict(test_generator)
    y_test = test_generator.classes
    y_pred_binary = preds.argmax(axis=1)
    
    print('---------------Predictions against Test Data---------------------')
    print("The Accuracy of the model with the given test sample is : ", accuracy_score(y_test, y_pred_binary)*100, "%")
    print('----------------------------------------------------------------')
    print('')
    
    print('-----------------Classification Report--------------------------')
    print(classification_report(y_test,y_pred_binary))
    print('----------------------------------------------------------------')
    print('')
    
    print('-----------------Confusion Matrix-------------------------------')
    cm = confusion_matrix(y_test,y_pred_binary)
    plt.subplots(figsize=(18, 6))
    sns.heatmap(cm/np.sum(cm), annot= True, fmt='.2%', cmap='Blues')
    plt.show()
    print('')
    
    print('----------------Actual vs Predicted Figures---------------------')
    plt.figure(figsize = (25,20))
    for i in range(20):
      plt.subplot(4,5,i+1)
      image = load_img(directory+'/'+test_generator.filenames[i],target_size=(64,64))
      plt.imshow(image)
      plt.title('Actual: {} - Predicted: {}'.format(print_labels[y_test[i]], print_labels[y_pred_binary[i]]))
    plt.show()
    

In [None]:
model = build_model()
model_colour = build_model()
model_deep = build_model()
model.summary()

In [None]:
plot_model(model)

In [None]:
plot_data(color_df)

In [None]:
train_df_colour, val_df_colour, test_df_colour = split_data(color_df)

In [None]:
train_generator_colour, val_generator_colour, test_generator_colour = define_image_generators(train_df_colour, val_df_colour, test_df_colour)

In [None]:
history_colour = model_colour.fit(train_generator_colour,
                    epochs = EPOCHS,
                    validation_data = val_generator_colour,
                    callbacks = callbacks)

In [None]:
pipeline_colour = Pipeline(steps=[('plot_accuracy_loss',plot_accuracy_loss(history_colour)),
                                  ('show_accuracy_loss',show_accuracy_loss(model_colour, test_generator_colour)),
                                  ('show_predictions',show_predictions(model_colour, test_generator_colour, dataset_dir))])

In [None]:
plot_data(deep_df)
train_df_deep, val_df_deep, test_df_deep = split_data(deep_df)
train_generator_deep, val_generator_deep, test_generator_deep = define_image_generators(train_df_deep, val_df_deep, test_df_deep)
history_deep = model_deep.fit(train_generator_deep,
                    epochs = EPOCHS,
                    validation_data = val_generator_deep,
                    callbacks = callbacks)
pipeline_deep = Pipeline(steps=[('plot_accuracy_loss',plot_accuracy_loss(history_deep)),
                                  ('show_accuracy_loss',show_accuracy_loss(model_deep, test_generator_deep)),
                                  ('show_predictions',show_predictions(model_deep, test_generator_deep, dataset_dir))])

In [None]:
plot_data(df)
train_df, val_df, test_df = split_data(df)
train_generator, val_generator, test_generator = define_image_generators(train_df, val_df, test_df)
history = model.fit(train_generator,
                    epochs = EPOCHS,
                    validation_data = val_generator,
                    callbacks = callbacks)
pipeline = Pipeline(steps=[('plot_accuracy_loss',plot_accuracy_loss(history)),
                                  ('show_accuracy_loss',show_accuracy_loss(model, test_generator)),
                                  ('show_predictions',show_predictions(model, test_generator, dataset_dir))])