# Initialization

In [1]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import SGD, Adam, Nadam, AdamW
from tensorflow.keras.optimizers import Optimizer
from tensorflow.keras.models import save_model
import time
import matplotlib.pyplot as plt

In [2]:
!nvidia-smi

Sat May 10 18:01:57 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   44C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os
import json
import pickle

def save_all_results(name, history, model, training_time, test_accuracy, save_dir='/content/drive/MyDrive/vgg_results'):
    os.makedirs(save_dir, exist_ok=True)

    with open(os.path.join(save_dir, f'{name}_history.json'), 'w') as f:
        json.dump(history.history, f)

    model.save(os.path.join(save_dir, f'{name}_model.h5'))

    summary = {
        'training_time': training_time,
        'test_accuracy': test_accuracy
    }
    with open(os.path.join(save_dir, f'{name}_summary.json'), 'w') as f:
        json.dump(summary, f)

# Data Preprocessing

In [6]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

y_train, y_test = to_categorical(y_train, 10), to_categorical(y_test, 10)  # One-hot encoding

# Split training data into train and validation sets
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

# Resize images to (224, 224)
def preprocess(image, label):
    image = tf.image.resize(image, (224, 224))
    image = tf.keras.applications.vgg16.preprocess_input(image)
    return image, label

batch_size = 32
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).map(preprocess).shuffle(1000).batch(batch_size)
val_ds = tf.data.Dataset.from_tensor_slices((x_val, y_val)).map(preprocess).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).map(preprocess).batch(batch_size)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 0us/step


# Build Model

In [7]:
def train(optimizer, epochs):

    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(10, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    start_time = time.time()
    history = model.fit(train_ds, epochs=epochs, validation_data=val_ds, verbose=1)
    end_time = time.time()
    training_time = end_time - start_time

    test_loss, test_accuracy = model.evaluate(test_ds, verbose=0)
    print(f"Test accuracy: {test_accuracy}")

    return history, model, training_time, test_accuracy

# Build Optimizer

In [8]:
class Lion(Optimizer):
    def __init__(
        self, learning_rate=0.001, beta_1=0.9, beta_2=0.99, weight_decay=1e-4, name="lion", **kwargs):
        super().__init__(learning_rate=learning_rate, name=name, **kwargs)
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.weight_decay = weight_decay

    def build(self, var_list):
        if self.built:
            return
        super().build(var_list)

        self._momentums = []
        for var in var_list:
            self._momentums.append(
                self.add_variable_from_reference(
                    reference_variable=var, name="momentum"
                )
            )

    def update_step(self, gradient, variable, learning_rate):

        lr = tf.cast(learning_rate, variable.dtype)
        gradient = tf.cast(gradient, variable.dtype)

        beta_1 = tf.cast(self.beta_1, variable.dtype)
        beta_2 = tf.cast(self.beta_2, variable.dtype)
        weight_decay = tf.cast(self.weight_decay, variable.dtype)

        m = self._momentums[self._get_variable_index(variable)]

        update = tf.sign(beta_1 * m + (1.0 - beta_1) * gradient)

        new_m = beta_2 * m + (1.0 - beta_2) * gradient
        self.assign(m, new_m)

        final_update = lr * (weight_decay * variable + update)
        self.assign_sub(variable, final_update)

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "beta_1": self.beta_1,
                "beta_2": self.beta_2,
                "learning_rate": self._serialize_hyperparameter("learning_rate"),
                "weight_decay": self.weight_decay,
            }
        )
        return config

# Train Model

In [9]:
optimizers = {
#    'SGD': SGD(learning_rate=0.001, momentum=0.9),
#    'SGD_Nesterov': SGD(learning_rate=0.001, momentum=0.9, nesterov=True),
#    'Adam': Adam(learning_rate=0.001),
#    'Adam_Amsgrad': Adam(learning_rate=0.001, amsgrad=True),
    'AdamW': AdamW(learning_rate=0.001, weight_decay=1e-4),
#    'Nadam': Nadam(learning_rate=0.001),
#    'Lion': Lion(learning_rate=1e-4)
}

histories = {}
training_times = {}
models = {}
test_accuracies = {}

for name, optimizer in optimizers.items():
    print(f"Training with {name} optimizer...")
    
    histories[name], models[name], training_times[name], test_accuracies[name] = train(optimizer, 20)

    save_all_results(name, histories[name], models[name], training_times[name], test_accuracies[name])

Training with AdamW optimizer...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
Epoch 1/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m280s[0m 188ms/step - accuracy: 0.5012 - loss: 2.4273 - val_accuracy: 0.8178 - val_loss: 0.5276
Epoch 2/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 185ms/step - accuracy: 0.7351 - loss: 0.7965 - val_accuracy: 0.8160 - val_loss: 0.5231
Epoch 3/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 197ms/step - accuracy: 0.7362 - loss: 0.7823 - val_accuracy: 0.8250 - val_loss: 0.5069
Epoch 4/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 196ms/step - accuracy: 0.7370 - loss: 0.7886 - val_accuracy: 0.8202 - val_loss: 0.5064
Epoch 5/20
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

