In [3]:

from datetime import datetime
import tensorflow as tf
import keras
from keras import layers
from keras.datasets import mnist
import keras_tuner as kt
from keras import backend as K


2024-05-11 17:35:51.080465: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-11 17:35:51.082090: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-11 17:35:51.107353: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-11 17:35:51.107389: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-11 17:35:51.108082: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [4]:
num_classes = 10
img_rows, img_cols = 28, 28

(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [5]:
class sgd_model(kt.HyperModel):
    def build(self, hp):
        inputs = keras.Input(shape=(28, 28, 1))
        x = layers.Conv2D(32, kernel_size=(3, 3), activation="relu", padding="same")(inputs)
        x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
        x = layers.Conv2D(32, kernel_size=(3, 3), activation="relu", padding="same")(x)
        x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
        x = layers.Flatten()(x)
        x = layers.Dense(128, activation="relu")(x)
        outputs = layers.Dense(num_classes, activation="softmax")(x)
        return keras.Model(inputs=inputs, outputs=outputs)

    def fit(self, hp, model, validation_data, callbacks=None, **kwargs):
        batch_size = hp.Int("batch_size", 32, 512, step=32, default=64)
        train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
        validation_data = tf.data.Dataset.from_tensor_slices(validation_data).batch(batch_size)

        optimizer = keras.optimizers.SGD(
            learning_rate=hp.Float("learning_rate", 1e-4, 1e-2, sampling="log", default=1e-3)
        )
        loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)

        val_accuracy_metric = keras.metrics.CategoricalAccuracy()

        @tf.function
        def run_train_step(images, labels):
            with tf.GradientTape() as tape:
                logits = model(images)
                loss = loss_fn(labels, logits)
                if model.losses:
                    loss += tf.math.add_n(model.losses)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        @tf.function
        def run_val_step(images, labels):
            logits = model(images)
            val_accuracy_metric.update_state(labels, logits)

        for callback in callbacks:
            callback.set_model(model)

        best_val_accuracy = 0.0
        i = 0
        for epoch in range(2):
            print(f"Epoch: {epoch}")

            for images, labels in train_ds:
                run_train_step(images, labels)

            val_accuracy_metric.reset_states()

            for images, labels in validation_data:
                run_val_step(images, labels)

            val_accuracy = float(val_accuracy_metric.result().numpy())

            for callback in callbacks:
                callback.on_epoch_end(epoch, logs={"my_metric": val_accuracy})

            print(f"Validation Accuracy: {val_accuracy}")            
            if val_accuracy < best_val_accuracy:
                i = i + 1    
            
            best_val_accuracy = max(best_val_accuracy, val_accuracy)

            if i > 3: # Early stopping criteria
                break
        

        return best_val_accuracy


In [6]:
sgd_tuner = kt.RandomSearch(
    objective=kt.Objective("my_metric", "max"),
    max_trials=5,
    hypermodel=sgd_model(),
    directory="randomsearch_results",
    project_name="sgd_custom_training",
    overwrite=True,
)


In [7]:
sgd_tuner.search(x=x_train, y=y_train, validation_data=(x_test, y_test))

Trial 5 Complete [00h 00m 22s]
my_metric: 0.10159999877214432

Best my_metric So Far: 0.770799994468689
Total elapsed time: 00h 01m 37s


In [8]:
sgd_best_hps = sgd_tuner.get_best_hyperparameters()[0]
print(sgd_best_hps.values)

sgd_best_model = sgd_tuner.get_best_models()[0]
sgd_best_model.summary()

sgd_tuner.results_summary()

{'batch_size': 64, 'learning_rate': 0.001}
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 7, 7, 32)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)

In [9]:
class adam_model(kt.HyperModel):
    def build(self, hp):
        inputs = keras.Input(shape=(28, 28, 1))
        x = layers.Conv2D(32, kernel_size=(3, 3), activation="relu", padding="same")(inputs)
        x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
        x = layers.Conv2D(32, kernel_size=(3, 3), activation="relu", padding="same")(x)
        x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
        x = layers.Flatten()(x)
        x = layers.Dense(128, activation="relu")(x)
        outputs = layers.Dense(num_classes, activation="softmax")(x)
        return keras.Model(inputs=inputs, outputs=outputs)

    def fit(self, hp, model, validation_data, callbacks=None, **kwargs):
        batch_size = hp.Int("batch_size", 32, 512, step=32, default=64)
        train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
        validation_data = tf.data.Dataset.from_tensor_slices(validation_data).batch(batch_size)

        optimizer = keras.optimizers.Adam(
            learning_rate = hp.Float("learning_rate", 1e-4, 1e-2, sampling="log", default=1e-3),
            beta_1 = hp.Float('beta_1', 0.8, 1, sampling="log", default=0.9),
            beta_2 = hp.Float('beta_2', 0.8, 1, sampling="log", default=0.99)
        )
        loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)

        val_accuracy_metric = keras.metrics.CategoricalAccuracy()

        @tf.function
        def run_train_step(images, labels):
            with tf.GradientTape() as tape:
                logits = model(images)
                loss = loss_fn(labels, logits)
                if model.losses:
                    loss += tf.math.add_n(model.losses)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        @tf.function
        def run_val_step(images, labels):
            logits = model(images)
            val_accuracy_metric.update_state(labels, logits)

        for callback in callbacks:
            callback.set_model(model)

        best_val_accuracy = 0.0
        i=0

        for epoch in range(2):
            print(f"Epoch: {epoch}")

            for images, labels in train_ds:
                run_train_step(images, labels)

            val_accuracy_metric.reset_states()

            for images, labels in validation_data:
                run_val_step(images, labels)

            val_accuracy = float(val_accuracy_metric.result().numpy())

            for callback in callbacks:
                callback.on_epoch_end(epoch, logs={"my_metric": val_accuracy})

            print(f"Validation Accuracy: {val_accuracy}")
            if val_accuracy < best_val_accuracy:
                i = i + 1    
            
            best_val_accuracy = max(best_val_accuracy, val_accuracy)

            if i > 3: # Early stopping criteria
                break
            

        return best_val_accuracy


In [10]:
adam_tuner = kt.RandomSearch(
    objective=kt.Objective("my_metric", "max"),
    max_trials=5,
    hypermodel=adam_model(),
    directory="randomsearch_results",
    project_name="adam_custom_training",
    overwrite=True,
)


In [11]:
adam_tuner.search(x=x_train, y=y_train, validation_data=(x_test, y_test))

Trial 5 Complete [00h 00m 16s]
my_metric: 0.9524999856948853

Best my_metric So Far: 0.9872999787330627
Total elapsed time: 00h 01m 29s


In [12]:
adam_best_hps = adam_tuner.get_best_hyperparameters()[0]
print(adam_best_hps.values)

adam_best_model = adam_tuner.get_best_models()[0]
adam_best_model.summary()

adam_tuner.results_summary()

{'batch_size': 64, 'learning_rate': 0.001, 'beta_1': 0.9, 'beta_2': 0.99}
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 7, 7, 32)          0         
 g2D)                                                            
                                                     