In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/My Drive/ai_image_classification_cifar/code')

Mounted at /content/drive


In [None]:
!ls /content/drive/My\ Drive/ai_image_classification_cifar/code

# Install packages

In [2]:
import numpy as np
import pandas as pd

from fastcore.all import *
from fastai.vision.all import *
from time import sleep
from pathlib import Path

from fastdownload import download_url
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.optimizers import Adam, SGD
import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout

import matplotlib.pyplot as plt
import random
from PIL import Image

import os
import shutil
import pickle

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
import seaborn as sns

from functions import *

# Create & import YAML files

In [3]:
%cd /content/drive/My\ Drive/ai_image_classification_cifar/code
!python create_yaml_files.py

/content/drive/My Drive/ai_image_classification_cifar/code


In [4]:
with open('../input/base_dict.yaml', 'r') as file:
    base_dict = yaml.safe_load(file)

# Source images

In [9]:
#Originally ran this code when sourcing images from Kaggle account. However, do not run this when images are already sourced.
class Images:
    '''
    A class to source images.
    '''
    def __init__(self, num_images):
        self.num_images = num_images
        self.orig_dir = '/kaggle/input/cifake-real-and-ai-generated-synthetic-images'
        self.dest_dir = '/kaggle/working/cifake'

    def copy_images(self):
        categories = ['FAKE', 'REAL']
        dataset_type = ['train', 'test']

        #Copy train & test images
        for i in dataset_type:
            for j in categories:
                orig_dir = os.path.join(self.orig_dir, i, j)
                dest_dir = os.path.join(self.dest_dir, i, j)
                source_images(orig_dir = orig_dir, dest_dir = dest_dir, num_images = self.num_images, seed = 23)
        #Copy validation images
        for j in categories:
            train_dir= os.path.join(self.dest_dir, 'train', j)
            validation_dir = '/kaggle/working/cifake/validation'

            all_files = os.listdir(train_dir)
            random.seed(23)
            selected_files = random.sample(all_files, 100)

            for file in selected_files:
                train_file_path = os.path.join(train_dir, file)
                validation_file_path = os.path.join(validation_dir, j, file)
                os.makedirs(validation_file_path, exist_ok=True)
                shutil.copy(train_file_path, validation_file_path)

                os.remove(train_file_path)

# Preprocess images

In [5]:
class Preprocess:
    def __init__(self, **mdict):
        self.mdict = mdict

    def create_generators(self):
        train_datagen = ImageDataGenerator(
            rescale = self.mdict['generators']['rescale'],
            rotation_range = self.mdict['generators']['rotation_range'],
            width_shift_range = self.mdict['generators']['width_shift_range'],
            height_shift_range = self.mdict['generators']['height_shift_range'],
            shear_range = self.mdict['generators']['shear_range'],
            zoom_range = self.mdict['generators']['zoom_range'],
            fill_mode = self.mdict['generators']['fill_mode'])

        train_generator = train_datagen.flow_from_directory(
            self.mdict['info']['train_dir'],
            target_size = (224, 224),
            batch_size = 32,
            classes = self.mdict['info']['classes'])

        validation_generator = ImageDataGenerator().flow_from_directory(
            self.mdict['info']['validation_dir'],
            target_size = (224, 224),
            batch_size = 32,
            classes = self.mdict['info']['classes'])

        test_generator = ImageDataGenerator().flow_from_directory(
            self.mdict['info']['test_dir'],
            target_size = (224, 224),
            batch_size = 32,
            classes = self.mdict['info']['classes'],
            shuffle = False)

        return train_generator, validation_generator, test_generator

In [6]:
generator = Preprocess(**base_dict)
train_generator, validation_generator, test_generator = generator.create_generators()

Found 800 images belonging to 2 classes.
Found 735 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


# Train and save CNN models

In [7]:
class CNN_model:
    def __init__(self, train_generator, validation_generator, **mdict):
        self.train_generator = train_generator
        self.validation_generator = validation_generator
        self.mdict = mdict

    def define_architecture(self):
        model = Sequential()

        #Convolutional layers
        for i, conv in enumerate(self.mdict['conv_layers']):
            if i==0:
                model.add(Conv2D(
                    filters = conv['filters'],
                    kernel_size = conv['kernel_size'],
                    activation =conv['activation'],
                    input_shape= conv['input_shape']
                ))
            else:
                model.add(Conv2D(
                    filters = conv['filters'],
                    kernel_size = conv['kernel_size'],
                    activation =conv['activation']
                ))

            model.add(MaxPooling2D(self.mdict['maxpool_layers']['pool_size']))

        #Flatten
        model.add(Flatten())

        #Dropout
        model.add(Dropout(self.mdict['model']['dropout']))

        #Dense layers
        model.add(Dense(
            units = self.mdict['dense_layers']['units'],
            activation = self.mdict['dense_layers']['activation']))

        #Output
        model.add(Dense(
            units = self.mdict['output_layer']['units'],
            activation = self.mdict['output_layer']['activation']))

        return model

    def compile_model(self):
        model = self.define_architecture()
        model.compile(
            loss= self.mdict['model']['loss'],
            optimizer= self.mdict['model']['optimizer'],
            metrics=self.mdict['model']['metrics'])
        return model

    def fit_model (self):
        model = self.compile_model()

        log_dir = "../output/logs/" + self.mdict['info']['model_name']
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

        history = model.fit(train_generator,
                            steps_per_epoch = self.mdict['model']['steps_per_epoch'],
                            epochs = self.mdict['model']['epochs'],
                            validation_data = self.validation_generator,
                            validation_steps = self.mdict['model']['validation_steps'],
                            callbacks=[tensorboard_callback])

        return history, model

    def save_model(self):
        history, model = self.fit_model()
        model.save(self.mdict['info']['model_filepath'])

        with open(self.mdict['info']['history_filepath'], 'wb') as file:
            pickle.dump(history.history, file)

In [None]:
base = CNN_model(train_generator, validation_generator, **base_dict)
base.save_model()

Epoch 1/20



# Evaluate models

In [8]:
class Evaluate_model:
    def __init__(self, test_generator):
        self.test_generator = test_generator
        self.test_table = pd.DataFrame()
        self.histories = []
        self.history_names = []

    def add_history(self, history, name):
        self.histories.append(history)
        self.history_names.append(name)

    def visualize_training(self, metrics):
        calc_histories(metrics, self.histories, self.history_names)

    def calc_metrics(self, model, name):
        row, accuracy, precision, recall, f1, y_true, y_pred_classes, y_pred = calc_eval_metrics(model, name, self.test_generator)
        self.test_table = pd.concat([self.test_table, row], ignore_index = True)
        return accuracy, precision, recall, f1, y_true, y_pred_classes, y_pred

    def plot_confusion_matrix(self, model, name):
        accuracy, precision, recall, f1, y_true, y_pred_classes, y_pred = self.calc_metrics(model, name)
        calc_confusion_matrix(y_true, y_pred_classes, name)

    def plot_roc_curve(self, model, name):
        accuracy, precision, recall, f1, y_true, y_pred_classes, y_pred = self.calc_metrics(model, name)
        n_classes = self.test_generator.num_classes
        y_test_bin = label_binarize(y_true, classes=[0, 1])

        calc_roc_curve(n_classes, y_true, y_pred, name)

# Run evaluation

In [None]:
def train_CNN_model(**mdict):
    generators = Preprocess(**mdict)
    train_generator, validation_generator, test_generator = generators.create_generators()
    model = CNN_model(train_generator, validation_generator, **mdict)
    model.save_model()

train_CNN_model(**base_dict)

Found 800 images belonging to 2 classes.
Found 735 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
Epoch 1/20



In [10]:
def run_eval(test_generator, **mdict):
    model = load_model(mdict['info']['model_filepath'])
    with open(mdict['info']['history_filepath'], 'rb') as file:
        history = pickle.load(file)

    model_eval = Evaluate_model(test_generator)
    model_eval.calc_metrics(model, mdict['info']['model_name'])
    model_eval.add_history(history, mdict['info']['model_name'])
    model_eval.plot_confusion_matrix(model, mdict['info']['model_name'])
    model_eval.plot_roc_curve(model, mdict['info']['model_name'])

run_eval(test_generator, base_dict)

NameError: name 'test_generator' is not defined