# Hyperparameter Tuning with Keras Tuner

In [8]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m132.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading joblib-1.5.1-py3-none-any.whl (307 kB)
Downloading scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m37.3/37.3 MB[0m [31m159.1 MB/s[0m eta [36m0:00:00[0m00:

In [4]:
!pip install tensorflow==2.16.2
!pip install keras-tuner==1.4.7
!pip install numpy<2.0.0

In [2]:
import sys
sys.setrecursionlimit(100000)

In [3]:
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
import os
import warnings

warnings.filterwarnings('ignore')

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 0 = all logs, 1 = filter out INFO, 2 = filter out INFO and WARNING, 3 = ERROR only

from tensorflow import keras
from sklearn.model_selection import train_test_split

2025-05-23 17:15:51.165813: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-23 17:15:51.167382: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-23 17:15:51.172605: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-23 17:15:51.186220: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-05-23 17:15:51.214892: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registe

## Load Dataset

In [4]:
(x_all, y_all), _ = keras.datasets.mnist.load_data()
x_all = x_all.astype("float32") / 255.0
x_temp, x_test, y_temp, y_test = train_test_split(x_all, y_all, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_temp, y_temp, test_size=0.25, random_state=42)

In [5]:
print(f'Training data shape: {x_train.shape}')
print(f'Validation data shape: {x_val.shape}')

Training data shape: (36000, 28, 28)
Validation data shape: (12000, 28, 28)


## Defining the model with hyperparameters

In [6]:
def build_model(hp):
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(units=hp.Int('units', min_value=32, max_value=512, step=32), activation='relu'),
        Dense(10, activation='softmax')])
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'])
    return model

## Configuring the hyperparameter search

In [7]:
tuner = kt.RandomSearch(build_model,
                        objective='val_accuracy',
                        max_trials=10,
                        executions_per_trial=2,
                        directory='my_dir',
                        project_name='intro_to_kt2')

tuner.search_space_summary()

Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': 'linear'}
learning_rate (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


## Running the hyperparameter search

In [8]:
print("Train shape:", x_train.shape)  # (8000, 28, 28)
print("Val shape:", x_val.shape)      # (2000, 28, 28)


Train shape: (36000, 28, 28)
Val shape: (12000, 28, 28)


In [9]:
import shutil
shutil.rmtree("my_dir/intro_to_kt2", ignore_errors=True)

In [10]:
x_train = x_train.reshape(-1, 28, 28)
x_val = x_val.reshape(-1, 28, 28)

tuner.search(x_train, y_train, epochs=5, validation_data=(x_val, y_val)) 
tuner.results_summary() 

Trial 10 Complete [00h 02m 39s]
val_accuracy: 0.9752083420753479

Best val_accuracy So Far: 0.9752083420753479
Total elapsed time: 00h 19m 52s
Results summary
Results in my_dir/intro_to_kt2
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 09 summary
Hyperparameters:
units: 320
learning_rate: 0.0012776503660968082
Score: 0.9752083420753479

Trial 06 summary
Hyperparameters:
units: 384
learning_rate: 0.000859445039264591
Score: 0.9743333458900452

Trial 05 summary
Hyperparameters:
units: 384
learning_rate: 0.003806871387360406
Score: 0.9723333120346069

Trial 04 summary
Hyperparameters:
units: 128
learning_rate: 0.0016740809594745873
Score: 0.971708357334137

Trial 07 summary
Hyperparameters:
units: 192
learning_rate: 0.0029297658365897957
Score: 0.9703333377838135

Trial 00 summary
Hyperparameters:
units: 192
learning_rate: 0.0034647745720185136
Score: 0.968583345413208

Trial 01 summary
Hyperparameters:
units: 64
learning_rate: 0.002605398610948284
Score: 0

## Analyzing and using the best hyperparameters

In [11]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] 
print(f""" 

The optimal number of units in the first dense layer is {best_hps.get('units')}. 

The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}. 

""") 

model = tuner.hypermodel.build(best_hps) 
model.fit(x_train, y_train, epochs=10, validation_split=0.2) 

test_loss, test_acc = model.evaluate(x_val, y_val) 
print(f'Test accuracy: {test_acc}') 

 

The optimal number of units in the first dense layer is 320. 

The optimal learning rate for the optimizer is 0.0012776503660968082. 


Epoch 1/10
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - accuracy: 0.8602 - loss: 0.4686 - val_accuracy: 0.9471 - val_loss: 0.1769
Epoch 2/10
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - accuracy: 0.9616 - loss: 0.1249 - val_accuracy: 0.9568 - val_loss: 0.1374
Epoch 3/10
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - accuracy: 0.9763 - loss: 0.0792 - val_accuracy: 0.9593 - val_loss: 0.1244
Epoch 4/10
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.9858 - loss: 0.0477 - val_accuracy: 0.9633 - val_loss: 0.1276
Epoch 5/10
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - accuracy: 0.9894 - loss: 0.0360 - val_accuracy: 0.9690 - val_loss: 0.1092
Epoch 6/10
[1m900/900[0m [32m━━━━━━━━━