# Google Colab-related code

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

import sys
sys.path.append('/content/drive/My Drive/ai_image_classification_cifar/code')
%cd /content/drive/My\ Drive/ai_image_classification_cifar/code

Mounted at /content/drive
/content/drive/My Drive/ai_image_classification_cifar/code


# Set up environment

In [None]:
#Install necessary dependencies
!bash install-dependencies.sh

#Install packages
from required_packages import *

Collecting tensorflow==2.17.0 (from -r requirements.txt (line 6))
  Downloading tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting keras==3.4.1 (from -r requirements.txt (line 7))
  Downloading keras-3.4.1-py3-none-any.whl.metadata (5.8 kB)
Collecting Pillow==9.4.0 (from -r requirements.txt (line 13))
  Downloading Pillow-9.4.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.3 kB)
Collecting ml-dtypes<0.5.0,>=0.3.1 (from tensorflow==2.17.0->-r requirements.txt (line 6))
  Downloading ml_dtypes-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting tensorboard<2.18,>=2.17 (from tensorflow==2.17.0->-r requirements.txt (line 6))
  Downloading tensorboard-2.17.1-py3-none-any.whl.metadata (1.6 kB)
Collecting namex (from keras==3.4.1->-r requirements.txt (line 7))
  Downloading namex-0.0.8-py3-none-any.whl.metadata (246 bytes)
Collecting optree (from keras==3.4.1->-r requirements.txt (line 7))
  Do

# Source images

In [None]:
#Run this code when sourcing images from Kaggle account. However, do not run this when images are already sourced.
class Images:
    '''
    A class to source images.
    '''
    def __init__(self, num_images):
        self.num_images = num_images
        self.orig_dir = '/kaggle/input/cifake-real-and-ai-generated-synthetic-images'
        self.dest_dir = '/kaggle/working/cifake'

    def copy_images(self):
        categories = ['FAKE', 'REAL']
        dataset_type = ['train', 'test']

        #Copy train & test images
        for i in dataset_type:
            for j in categories:
                orig_dir = os.path.join(self.orig_dir, i, j)
                dest_dir = os.path.join(self.dest_dir, i, j)
                functions.source_images(orig_dir = orig_dir, dest_dir = dest_dir, num_images = self.num_images, seed = 23)
        #Copy validation images
        for j in categories:
            train_dir= os.path.join(self.dest_dir, 'train', j)
            validation_dir = '/kaggle/working/cifake/validation'

            all_files = os.listdir(train_dir)
            random.seed(23)
            selected_files = random.sample(all_files, 100)

            for file in selected_files:
                train_file_path = os.path.join(train_dir, file)
                validation_file_path = os.path.join(validation_dir, j, file)
                os.makedirs(validation_file_path, exist_ok=True)
                shutil.copy(train_file_path, validation_file_path)

                os.remove(train_file_path)

# Preprocess images

In [None]:
class Preprocess:
    def __init__(self, **mdict):
        self.mdict = mdict

    def create_generators(self):
        train_datagen = ImageDataGenerator(
            rescale = self.mdict['generators']['rescale'],
            rotation_range = self.mdict['generators']['rotation_range'],
            width_shift_range = self.mdict['generators']['width_shift_range'],
            height_shift_range = self.mdict['generators']['height_shift_range'],
            shear_range = self.mdict['generators']['shear_range'],
            zoom_range = self.mdict['generators']['zoom_range'],
            fill_mode = self.mdict['generators']['fill_mode'])

        train_generator = train_datagen.flow_from_directory(
            self.mdict['info']['train_dir'],
            target_size = (224, 224),
            batch_size = 32,
            classes = self.mdict['info']['classes'])

        validation_generator = ImageDataGenerator().flow_from_directory(
            self.mdict['info']['validation_dir'],
            target_size = (224, 224),
            batch_size = 32,
            classes = self.mdict['info']['classes'])

        test_generator = ImageDataGenerator().flow_from_directory(
            self.mdict['info']['test_dir'],
            target_size = (224, 224),
            batch_size = 32,
            classes = self.mdict['info']['classes'],
            shuffle = False)

        return train_generator, validation_generator, test_generator

# Load YAML file and create training, validation, and test datasets

In [None]:
yaml_file = '../input/base_tf_dict.yaml'

with open(yaml_file, 'r') as file:
    df_dict = yaml.safe_load(file)

generator = Preprocess(**df_dict)
train_generator, validation_generator, test_generator = generator.create_generators()

Found 800 images belonging to 2 classes.
Found 735 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


# Create base model from pre-trained MobileNet V2 Model (Google)

In [None]:
# Create the base model from the pre-trained model MobileNet V2
  # Load pre-trained model without the top classification layer (dense layers)
  # Load pretrained weights from imagenet

IMG_SIZE = df_dict['preprocess']['resize']
IMG_SHAPE = IMG_SIZE + (3,)
tf_model = tf.keras.applications.MobileNetV2(input_shape= IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

# Feature extraction: freeze convolutional base and add classifier
tf_model.trainable = False

#Identify feature batch
image_batch, label_batch = next(iter(train_generator))
feature_batch = tf_model(image_batch)
print(feature_batch.shape) #7 x 7 x 1280 block of features

NameError: name 'df_dict' is not defined

In [None]:
# Add classification head; use GlobalAveragePooling2D to convert block of features to a single 1280 element vector per image
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape) #(32, 1280)

# Apply Dense layer to convert features into a single prediction per image: positive #s predict class 1, negative #s predict class 0
prediction_layer = tf.keras.layers.Dense(1, activation='sigmoid')
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape) #(32, 1)

(32, 1280)
(32, 1)


In [None]:
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

inputs = tf.keras.Input(shape=(df_dict['preprocess']['resize']))
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = global_average_layer(x) #Adds GlobalAveragePooling2D layer
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x) #Adds Dense layer: sigmoid
model = tf.keras.Model(inputs, outputs)

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=df_dict['transfer_learning']['learning_rate']),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.5, name='accuracy')])

In [None]:
history = model.fit(train_generator,
                    epochs=df_dict['transfer_learning']['initial_epochs'],
                    validation_data=validation_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Fine tuning

In [None]:
# Weights to be tuned from generic feature maps to features associated with dataset

In [None]:
#Unfreeze the base_model
base_model.trainable = True

In [None]:
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine-tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable = False

Number of layers in the base model:  154


In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer = tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate/10),
              metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.5, name='accuracy')])

In [None]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 sequential_4 (Sequential)   (None, None, None, None   0         
                             )                                   
                                                                 
 tf.math.truediv (TFOpLambd  (None, 224, 224, 3)       0         
 a)                                                              
                                                                 
 tf.math.subtract (TFOpLamb  (None, 224, 224, 3)       0         
 da)                                                             
                                                                 
 mobilenetv2_1.00_224 (Func  (None, 7, 7, 1280)        2257984   
 tional)                                                   

In [None]:
fine_tune_epochs = 10
total_epochs =  initial_epochs + fine_tune_epochs

history_fine = model.fit(train_generator,
                         epochs=total_epochs,
                         initial_epoch=len(history.epoch),
                         validation_data=validation_generator)

Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Archived Code

In [None]:
# #Display images
# #Use data augmentation
# data_augmentation = keras.Sequential(
#     [tf.keras.layers.RandomFlip("horizontal"),
#      tf.keras.layers.RandomRotation(0.1),
#     ]
# )

# #Get batch of images from train generator
# image_batch, _ = next(train_generator)
# first_image = image_batch[1]

# # Plotting augmented images
# plt.figure(figsize=(10, 10))
# for i in range(9):
#     ax = plt.subplot(3, 3, i + 1)

#     # Apply data augmentation
#     augmented_image = data_augmentation(tf.expand_dims(first_image, 0))

#     # Remove the batch dimension and convert the augmented image to a valid range for display
#     augmented_image = augmented_image[0].numpy()

#     # Rescale augmented_image to the range [0, 1] for display
#     augmented_image = (augmented_image + 1) / 2.0

#     # Display the augmented image
#     plt.imshow(augmented_image)
#     plt.axis('off')