In [5]:
import tensorflow as tf
import numpy as np
import random
import time

In [6]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
X_train = X_train / 255
X_test = X_test / 255
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)

In [7]:
def set_global_seed(seed_val):
    random.seed(seed_val)
    np.random.seed(seed_val)
    tf.random.set_seed(seed_val)

In [8]:
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(32, 32, 3)),
    
        # Block 1
        tf.keras.layers.Conv2D(32, 3, activation="relu", padding="same"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.DepthwiseConv2D(3, padding="same", activation="relu", depth_multiplier=2),
        tf.keras.layers.Dropout(0.1),
    
        # Block 2
        tf.keras.layers.Conv2D(64, 3, activation="relu", strides=2, padding="same"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.1),
    
        # Block 3
        tf.keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.DepthwiseConv2D(3, padding="same", activation="relu", depth_multiplier=2),
        tf.keras.layers.Dropout(0.3),
    
        # Block 4
        tf.keras.layers.Conv2D(256, 3, activation="relu", strides=2, padding="same"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.DepthwiseConv2D(3, padding="same", activation="relu"),
        tf.keras.layers.Dropout(0.3),
    
        # Block 5
        tf.keras.layers.Conv2D(512, 3, activation="relu", strides=2, padding="same"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.4),
    
        # Dense Layers
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(2048, activation="relu"),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(512, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(10, activation="softmax"),
    ])
    
    return model

In [9]:
BATCH_SIZE = 32

# Create a tf.data.Dataset from the variables
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))

# Shuffle and batch the dataset
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE)

# Define data augmentation pipeline
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(10/360, fill_mode="nearest"),
    tf.keras.layers.RandomTranslation(0.1, 0.1, fill_mode="nearest")
])

# Apply data augmentation to the dataset
augmented_train_dataset = train_dataset.map(
    lambda x, y: (data_augmentation(x, training=True), y),
    num_parallel_calls=tf.data.AUTOTUNE  # Enable parallel processing
)

# # Create an additional dataset that contains 50% of the original dataset
# additional_data = train_dataset.take(int(np.ceil(len(X_train)*.5/BATCH_SIZE))).map(
#     lambda x, y: (data_augmentation(x, training=True), y),
#     num_parallel_calls=tf.data.AUTOTUNE
# )
# augmented_train_dataset = augmented_train_dataset.concatenate(additional_data)

# Add prefetching for performance
augmented_train_dataset = augmented_train_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)

In [10]:
total_time_taken = 0
initial_epoch = 0
# Model creation and compilation
set_global_seed(42)
model = create_model()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

In [12]:
epochs = 50
start_time = time.time()
history = model.fit(augmented_train_dataset, validation_data=(X_test, y_test), 
                    epochs=epochs, initial_epoch=initial_epoch, callbacks=[])
end_time = time.time()
total_time_taken += (end_time-start_time)
initial_epoch = epochs
print(f"\nModel trained for {epochs} epochs in {round(total_time_taken/60, 2)} minutes.")

Epoch 11/50
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7406 - loss: 0.7455 - val_accuracy: 0.7776 - val_loss: 0.6632
Epoch 12/50
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7508 - loss: 0.7238 - val_accuracy: 0.7666 - val_loss: 0.7009
Epoch 13/50
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7619 - loss: 0.6865 - val_accuracy: 0.7944 - val_loss: 0.6125
Epoch 14/50
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7736 - loss: 0.6606 - val_accuracy: 0.7847 - val_loss: 0.6346
Epoch 15/50
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7786 - loss: 0.6400 - val_accuracy: 0.7987 - val_loss: 0.5932
Epoch 16/50
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.7830 - loss: 0.6263 - val_accuracy: 0.7878 - val_loss: 0.635