# 3. Vary weight initialization component

In [None]:
import os
import pickle
import contextlib
import gc
import tensorflow                   as tf
import numpy                        as np
import matplotlib.pyplot            as plt
from tensorflow                     import keras
from tensorflow.keras               import layers, models
from tensorflow.keras.regularizers  import l2

In [3]:
# Load the MNIST dataset
(train_images, train_labels), (val_images, val_labels) = tf.keras.datasets.mnist.load_data()

# Expand dimensions to add a channel dimension
train_images  = np.expand_dims(train_images, axis=-1)
val_images    = np.expand_dims(val_images, axis=-1)

# Normalize the images to the range [0, 1]
train_images  = train_images.astype('float32')/255.0
val_images    = val_images.astype('float32')/255.0

# Resize images to 32x32 and convert grayscale to RGB
train_images  = tf.image.grayscale_to_rgb(tf.image.resize(train_images, (32,32)))
val_images    = tf.image.grayscale_to_rgb(tf.image.resize(val_images, (32,32)))

# Reserve 2000 data points for testing
test_images   = val_images[8000:]
val_images    = val_images[:8000]

# Split labels accordingly
test_labels   = val_labels[8000:]
val_labels    = val_labels[:8000]

# Convert labels to one-hot encoding
train_labels  = tf.keras.utils.to_categorical(train_labels, 10)
val_labels    = tf.keras.utils.to_categorical(val_labels, 10)
test_labels   = tf.keras.utils.to_categorical(test_labels, 10)

# Define batch size
batch_size    = 64

# Function to create a data generator
def data_generator(images, labels, batch_size = batch_size):
    dataset   = tf.data.Dataset.from_tensor_slices((images, labels))
    dataset   = dataset.cache()
    dataset   = dataset.shuffle(20000)
    dataset   = dataset.batch(batch_size)
    dataset   = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

# Create datasets for training, validation, and testing
train_dataset = data_generator(train_images, train_labels)
val_dataset   = data_generator(val_images, val_labels)
test_dataset  = data_generator(test_images, test_labels)

2024-10-17 14:32:57.675674: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [20]:
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.backend import epsilon
from tensorflow.keras.initializers import GlorotUniform
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Lambda, Dropout
from tensorflow.keras.regularizers import l2

class F1Score(tf.keras.metrics.Metric):
    """F1 Score Metric for TensorFlow Keras models.
    This class implements the F1 Score metric, which is the harmonic mean of precision and recall.
    F1 = 2 * (precision * recall) / (precision + recall)
    Attributes:
        precision (Precision): Instance of tf.keras.metrics.Precision
        recall (Recall): Instance of tf.keras.metrics.Recall
    Methods:
        update_state(y_true, y_pred, sample_weight=None): 
            Updates the internal states of precision and recall metrics.
        result(): 
            Computes and returns the F1 score based on current precision and recall values.
        reset_states(): 
            Resets the internal states of both precision and recall metrics.
    Args:
        name (str, optional): Name of the metric. Defaults to "f1_score".
        **kwargs: Additional keyword arguments to be passed to the parent class.
    Example:
        ```python
        model.compile(optimizer='adam',
                     loss='binary_crossentropy',
                     metrics=[F1Score()])
        ```
    """
    def __init__(self, name="f1_score", **kwargs):
        super(F1Score, self).__init__(name=name, **kwargs)
        self.precision = Precision()
        self.recall = Recall()

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)

    def result(self):
        p = self.precision.result()
        r = self.recall.result()
        return 2 * ((p * r) / (p + r + tf.keras.backend.epsilon()))

    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()


In [13]:
import random

def set_random_seeds(seed_value):
    """
    Set random seeds.
    Parameters:
    - seed_value: The seed value to be used for all random number generators.
    """
    np.random.seed(seed_value)
    random.seed(seed_value)
    tf.random.set_seed(seed_value)

In [15]:
# Function to create a model, where CNN as the feature extractor and SGD as the optimizer
# This architecture remains the same across all 10 seeds.

def create_model():
    """
    Creates a CNN model with a predefined architecture.

    Returns:
    - model: Compiled CNN model ready for training.
    """
        
    model = tf.keras.Sequential(
        [
            tf.keras.Input(shape            = (32, 32, 3)),                                 # Input layer for 32x32 RGB images

            # First convolutional block
            layers.Conv2D(32, kernel_size   = (3, 3), activation = "relu"),
            layers.BatchNormalization(),
            layers.MaxPooling2D(pool_size   = (2, 2)),
            
            # Second convolutional block
            layers.Conv2D(64, kernel_size   = (3, 3), activation="relu"),
            layers.BatchNormalization(),
            layers.MaxPooling2D(pool_size   = (2, 2)),

            # Third convolutional block
            layers.Conv2D(128, kernel_size  = (3, 3), activation="relu"),
            layers.BatchNormalization(),
            layers.MaxPooling2D(pool_size   = (2, 2)),
            
            # Classifier head
            layers.GlobalAveragePooling2D(),
            layers.Dropout(0.3),
            layers.Dense(128,
                         activation         = "relu",
                         kernel_regularizer = l2(0.001)
                        ),
            layers.Dropout(0.3),
            layers.Dense(10, 
                         activation         = "softmax",
                         kernel_regularizer = l2(0.001)
                        )
        ]
    )
    
    # Compile the model using the SGD optimizer
    model.compile(optimizer = 'SGD',
                  loss      = 'categorical_crossentropy',
                  metrics   = ['accuracy',
                              Precision(name = 'precision'),
                              Recall(name    = 'recall'),
                              AUC(name       = 'auc'),
                              F1Score(name   = 'f1_score')
                              ]
                 )
    return model

In [23]:
num_trials = 10
results    = []
save_dir   = "models/diff_seeds"

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

for i in range(num_trials):
    print(f"Trial {i+1}")
    
    seed     = i  # Different seed for each trial
    set_random_seeds(seed)

    # Create a new model for each trial with a fresh initialization
    model      = create_model()
    
    history    = model.fit(train_dataset,
                         validation_data = val_dataset,
                         epochs          = 15,
                         verbose         = 2)

    train_loss = history.history["loss"][-1]
    train_acc  = history.history["accuracy"][-1]
    val_loss   = history.history["val_loss"][-1]
    val_acc    = history.history["val_accuracy"][-1]
    print(f"Validation Accuracy for trial {i+1}: {val_acc}")

    test_loss, test_acc, _, _, _, _ = model.evaluate(test_dataset)
    print(f"Test Accuracy for trial {i+1}: {test_acc}")
    
    results.append({
        'trial'      : i+1,
        'seed'       : seed,
        'train_loss' : train_loss,
        'train_acc'  : train_acc,
        'val_loss'   : val_loss,
        'val_acc'    : val_acc,
        'test_loss'  : test_loss,
        'test_acc'   : test_acc,
        'history'    : history.history
    })
    
    model_name = f'CNN{i+1}.h5'
    model_path = os.path.join(save_dir, model_name)

    # Save the model if the performance criteria are met
    if train_acc > 0.98 and val_acc > 0.98 and test_acc > 0.98:
        model.save(model_path)
        print(f"Model saved as {model_path}")

Trial 1
Epoch 1/15
938/938 - 9s - loss: 0.3282 - accuracy: 0.9355 - precision: 0.9646 - recall: 0.9138 - auc: 0.9965 - f1_score: 0.9385 - val_loss: 0.1492 - val_accuracy: 0.9824 - val_precision: 0.9860 - val_recall: 0.9789 - val_auc: 0.9994 - val_f1_score: 0.9824 - 9s/epoch - 10ms/step
Epoch 2/15
938/938 - 7s - loss: 0.1356 - accuracy: 0.9812 - precision: 0.9845 - recall: 0.9775 - auc: 0.9992 - f1_score: 0.9810 - val_loss: 0.0927 - val_accuracy: 0.9893 - val_precision: 0.9908 - val_recall: 0.9869 - val_auc: 0.9997 - val_f1_score: 0.9889 - 7s/epoch - 8ms/step
Epoch 3/15
938/938 - 7s - loss: 0.0976 - accuracy: 0.9855 - precision: 0.9885 - recall: 0.9829 - auc: 0.9994 - f1_score: 0.9857 - val_loss: 0.0693 - val_accuracy: 0.9905 - val_precision: 0.9933 - val_recall: 0.9886 - val_auc: 0.9999 - val_f1_score: 0.9910 - 7s/epoch - 8ms/step
Epoch 4/15
938/938 - 7s - loss: 0.0786 - accuracy: 0.9882 - precision: 0.9906 - recall: 0.9857 - auc: 0.9996 - f1_score: 0.9882 - val_loss: 0.0843 - val_accu

  saving_api.save_model(


938/938 - 9s - loss: 0.3249 - accuracy: 0.9376 - precision: 0.9646 - recall: 0.9182 - auc: 0.9965 - f1_score: 0.9409 - val_loss: 0.1365 - val_accuracy: 0.9874 - val_precision: 0.9903 - val_recall: 0.9850 - val_auc: 0.9996 - val_f1_score: 0.9877 - 9s/epoch - 10ms/step
Epoch 2/15
938/938 - 7s - loss: 0.1400 - accuracy: 0.9804 - precision: 0.9838 - recall: 0.9772 - auc: 0.9992 - f1_score: 0.9805 - val_loss: 0.0951 - val_accuracy: 0.9898 - val_precision: 0.9915 - val_recall: 0.9875 - val_auc: 0.9996 - val_f1_score: 0.9895 - 7s/epoch - 8ms/step
Epoch 3/15
938/938 - 7s - loss: 0.0964 - accuracy: 0.9858 - precision: 0.9886 - recall: 0.9833 - auc: 0.9995 - f1_score: 0.9859 - val_loss: 0.0757 - val_accuracy: 0.9891 - val_precision: 0.9907 - val_recall: 0.9876 - val_auc: 0.9997 - val_f1_score: 0.9892 - 7s/epoch - 8ms/step
Epoch 4/15
938/938 - 7s - loss: 0.0801 - accuracy: 0.9875 - precision: 0.9901 - recall: 0.9853 - auc: 0.9995 - f1_score: 0.9877 - val_loss: 0.0861 - val_accuracy: 0.9845 - val_

In [18]:
for result in results:
    print(f"Trial {result['trial']}: Seed {result['seed']} - Validation Accuracy: {result['val_acc']} ")

Trial 1: Seed 0 - Validation Accuracy: 0.9917730689048767 
Trial 2: Seed 1 - Validation Accuracy: 0.9907907843589783 
Trial 3: Seed 2 - Validation Accuracy: 0.9917730689048767 
Trial 4: Seed 3 - Validation Accuracy: 0.9914047122001648 
Trial 5: Seed 4 - Validation Accuracy: 0.9899312257766724 
Trial 6: Seed 5 - Validation Accuracy: 0.9902995824813843 
Trial 7: Seed 6 - Validation Accuracy: 0.9916502833366394 
Trial 8: Seed 7 - Validation Accuracy: 0.990667998790741 
Trial 9: Seed 8 - Validation Accuracy: 0.991895854473114 
Trial 10: Seed 9 - Validation Accuracy: 0.9907907843589783 
