In [2]:
# file: multi_model_tuner.py
# Run with: python multi_model_tuner.py --model EfficientNetB0
# or: import functions into notebook and call them.

import os, sys, math, shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import keras_tuner as kt
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# Pretrained model helpers
from tensorflow.keras.applications import (
    MobileNetV2, EfficientNetB0, InceptionV3, ResNet50, VGG16
)
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_preproc
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preproc
from tensorflow.keras.applications.inception_v3 import preprocess_input as inception_preproc
from tensorflow.keras.applications.resnet import preprocess_input as resnet_preproc
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preproc

In [3]:
#pip install keras_tuner

In [4]:
print("TF version:", tf.__version__)

TF version: 2.19.0


In [6]:
# ---------- Config ----------
DATA_ROOT = "data"   # <- update: path in Colab (upload or mount Drive)
TRAIN_DIR = os.path.join(DATA_ROOT, "train")
VAL_DIR   = os.path.join(DATA_ROOT, "val")
TEST_DIR  = os.path.join(DATA_ROOT, "test")  # optional but recommended

MODELS_DIR = "/content/models"
os.makedirs(MODELS_DIR, exist_ok=True)

In [7]:
# Tuner / training resource control
TRIALS = 6           # number of tuner trials per model (increase if you have time)
MAX_TUNER_EPOCHS = 6 # epochs during tuner search (keeps tuning cheap)
FINAL_EPOCHS = 30    # final training epochs (with early stopping)
BATCH_SIZE_DEFAULT = 32

In [8]:
# List of model names to run (order)
MODEL_LIST = ["CustomCNN", "MobileNetV2", "EfficientNetB0", "InceptionV3", "ResNet50", "VGG16"]

In [9]:
# ------------ 3) Per-model config (input sizes, preprocessors, base model ref, default batch) ------------
MODEL_CONFIG = {
    "CustomCNN":      {"input_size": (224,224), "preproc": lambda x: x/255.0, "batch": 32, "base": None},
    "MobileNetV2":    {"input_size": (224,224), "preproc": mobilenet_preproc, "batch": 32, "base": MobileNetV2},
    "EfficientNetB0": {"input_size": (224,224), "preproc": eff_preproc, "batch": 32, "base": EfficientNetB0},
    "InceptionV3":    {"input_size": (299,299), "preproc": inception_preproc, "batch": 24, "base": InceptionV3},
    "ResNet50":       {"input_size": (224,224), "preproc": resnet_preproc, "batch": 32, "base": ResNet50},
    "VGG16":          {"input_size": (224,224), "preproc": vgg_preproc, "batch": 16, "base": VGG16}
}


In [10]:
def get_generators(model_name):
    cfg = MODEL_CONFIG[model_name]
    img_size = cfg["input_size"]
    batch = cfg["batch"]
    preproc = cfg["preproc"]

    # Train augmentation
    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function=preproc,
        rotation_range=20,
        width_shift_range=0.12,
        height_shift_range=0.12,
        zoom_range=0.15,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preproc)
    test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preproc)

    train_gen = train_datagen.flow_from_directory(TRAIN_DIR, target_size=img_size, batch_size=batch, class_mode='categorical', shuffle=True)
    val_gen   = val_datagen.flow_from_directory(VAL_DIR,   target_size=img_size, batch_size=batch, class_mode='categorical', shuffle=False)
    test_gen  = None
    if os.path.exists(TEST_DIR):
        test_gen = test_datagen.flow_from_directory(TEST_DIR, target_size=img_size, batch_size=batch, class_mode='categorical', shuffle=False)

    return train_gen, val_gen, test_gen

In [11]:
def compute_class_weights_from_gen(gen):
    classes = gen.classes
    class_ids = np.unique(classes)
    weights = compute_class_weight(class_weight='balanced', classes=class_ids, y=classes)
    return dict(enumerate(weights))

In [12]:
def plot_history(h, title="history"):
    plt.figure(figsize=(12,4))
    if 'accuracy' in h.history:
        plt.subplot(1,2,1)
        plt.plot(h.history['accuracy'], label='train_acc')
        plt.plot(h.history.get('val_accuracy', []), label='val_acc')
        plt.legend(); plt.title(f"{title} - Accuracy")
    plt.subplot(1,2,2)
    plt.plot(h.history['loss'], label='train_loss')
    plt.plot(h.history.get('val_loss', []), label='val_loss')
    plt.legend(); plt.title(f"{title} - Loss")
    plt.show()

In [13]:
def build_custom_cnn(hp, input_shape, n_classes):
    inputs = tf.keras.Input(shape=input_shape)
    x = inputs
    # conv blocks 2-4
    for i in range(hp.Int("conv_blocks", 2, 4)):
        filters = hp.Choice(f"filters_{i}", [32, 48, 64])
        x = layers.Conv2D(filters, (3,3), padding='same', activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling2D((2,2))(x)
    x = layers.Flatten()(x)
    x = layers.Dense(hp.Int("dense_units", 64, 256, step=64), activation='relu')(x)
    x = layers.Dropout(hp.Float("dropout", 0.2, 0.5, step=0.1))(x)
    outputs = layers.Dense(n_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs, outputs)
    lr = hp.Choice("lr", [1e-3, 1e-4, 1e-5])
    model.compile(optimizer=optimizers.Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [14]:
# ---------- Pretrained model builder factory (for tuner) ----------
def make_pretrained_builder(model_name):
    cfg = MODEL_CONFIG[model_name]
    base_fn = cfg["base"]
    input_shape = cfg["input_size"] + (3,)
    def builder(hp):
        base_model = base_fn(weights='imagenet', include_top=False, input_shape=input_shape)
        base_model.trainable = False
        inputs = tf.keras.Input(shape=input_shape)
        x = base_model(inputs, training=False)
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dropout(hp.Float("top_dropout", 0.2, 0.5, step=0.1))(x)
        x = layers.Dense(hp.Int("dense_units", 64, 256, step=64), activation='relu')(x)
        x = layers.Dropout(hp.Float("head_dropout", 0.2, 0.5, step=0.1))(x)
        outputs = layers.Dense(2, activation='softmax')(x)  # placeholder, will adapt later
        model = tf.keras.Model(inputs, outputs)
        lr = hp.Choice("lr", [1e-3, 1e-4, 1e-5])
        model.compile(optimizer=optimizers.Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
        return model
    return builder

In [15]:
# ------------ 6) Tuner runner per model ------------
def run_tuner_and_train(model_name, trials=TRIALS, max_tuner_epochs=MAX_TUNER_EPOCHS, final_epochs=FINAL_EPOCHS):
    print("\n\n=== START:", model_name, "===\n")
    train_gen, val_gen, test_gen = get_generators(model_name)
    n_classes = train_gen.num_classes
    input_shape = MODEL_CONFIG[model_name]["input_size"] + (3,)
    class_weights = compute_class_weights_from_gen(train_gen)
    print("Detected classes:", list(train_gen.class_indices.keys()))
    print("Train samples:", train_gen.samples, "Val samples:", val_gen.samples, "Test samples:", test_gen.samples if test_gen else "No test")

    if model_name == "CustomCNN":
        def build_fn(hp): return build_custom_cnn(hp, input_shape=input_shape, n_classes=n_classes)
    else:
        base_builder = make_pretrained_builder(model_name)
        def build_fn(hp):
            m = base_builder(hp)
            m = adapt_model_output(m, n_classes)
            # recompile with the chosen LR (m.optimizer already set)
            m.compile(optimizer=m.optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
            return m

    tuner = kt.RandomSearch(
        build_fn,
        objective='val_accuracy',
        max_trials=trials,
        executions_per_trial=1,
        directory=f"tuner_logs/{model_name}",
        project_name='hp_search',
        overwrite=True
    )

    stop_early = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    print("Starting tuner.search... (this will run short trainings to explore HP space)")
    tuner.search(train_gen, validation_data=val_gen, epochs=max_tuner_epochs, callbacks=[stop_early], class_weight=class_weights)

    # get best hps
    best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
    print("Best HP for", model_name, ":", best_hp.values)

    # Build best model and train to convergence
    best_model = tuner.hypermodel.build(best_hp)
    best_model = adapt_model_output(best_model, n_classes)
    best_model.compile(optimizer=best_model.optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    # Callbacks for final training
    ckpt_path = os.path.join(MODELS_DIR, f"{model_name}_best.h5")
    callbacks = [
        ModelCheckpoint(ckpt_path, monitor='val_accuracy', save_best_only=True, verbose=1),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1),
        EarlyStopping(monitor='val_accuracy', patience=6, restore_best_weights=True, verbose=1)
    ]

    print("Starting final training for", model_name)
    history = best_model.fit(train_gen, validation_data=val_gen, epochs=final_epochs, callbacks=callbacks, class_weight=class_weights)
    plot_history(history, title=model_name + " final")

    # Evaluate on test if exists
    if test_gen:
        print("Evaluating on test set...")
        best_model = tf.keras.models.load_model(ckpt_path)
        preds = best_model.predict(test_gen, verbose=1)
        y_pred = np.argmax(preds, axis=1)
        y_true = test_gen.classes
        class_names = list(test_gen.class_indices.keys())
        print(classification_report(y_true, y_pred, target_names=class_names))
        cm = confusion_matrix(y_true, y_pred)
        plt.figure(figsize=(10,8))
        sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names)
        plt.title(f"{model_name} - Confusion Matrix (test)")
        plt.show()

    # Save best HP to CSV for recording
    try:
        import json
        hp_json = best_hp.values
        with open(os.path.join(MODELS_DIR, f"{model_name}_best_hp.json"), "w") as f:
            json.dump(hp_json, f, indent=2)
    except Exception as e:
        print("Could not save HP:", e)

    return ckpt_path

In [16]:
# ------------ 7) Run all models sequentially (CAUTION: time & GPU) ------------
results = {}  # store paths & later test accuracies
for model_name in MODEL_LIST:
    # Skip if user doesn't want certain models; else uncomment all
    print("\n>>>>> Running model:", model_name)
    model_file = run_tuner_and_train(model_name, trials=TRIALS)
    results[model_name] = model_file

Trial 6 Complete [00h 35m 10s]
val_accuracy: 0.3424908518791199

Best val_accuracy So Far: 0.8397436141967773
Total elapsed time: 03h 54m 24s
Best HP for CustomCNN : {'conv_blocks': 3, 'filters_0': 64, 'filters_1': 48, 'dense_units': 192, 'dropout': 0.2, 'lr': 0.0001, 'filters_2': 32}


NameError: name 'adapt_model_output' is not defined

In [17]:
# ------------ 8) Final summary: load models and compute test accuracy (if test_dir present) ------------
summary = []
if os.path.exists(TEST_DIR):
    # reuse generators to ensure same target ordering
    for model_name, model_file in results.items():
        print("\nLoading and evaluating:", model_name)
        _, _, test_gen = get_generators(model_name)
        model = tf.keras.models.load_model(model_file)
        loss, acc = model.evaluate(test_gen, verbose=0)
        summary.append({"model": model_name, "test_accuracy": acc, "model_file": model_file})
    df_summary = pd.DataFrame(summary).sort_values("test_accuracy", ascending=False).reset_index(drop=True)
    print("\n=== Final model comparison ===")
    display(df_summary)
    champion = df_summary.iloc[0]
    print("Champion model:", champion['model'], "file:", champion['model_file'], "test_acc:", champion['test_accuracy'])
else:
    print("No TEST_DIR provided — provide test directory to compute final comparison.")


KeyError: 'test_accuracy'

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
import keras_tuner as kt

In [2]:
DATA_DIR = "data"
IMG_SIZE = (224, 224)  # can be changed per model
BATCH_SIZE = 32
NUM_CLASSES = 11

train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    zoom_range=0.2,
    horizontal_flip=True
)
val_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

train_gen = train_datagen.flow_from_directory(
    "data/train", target_size=IMG_SIZE, batch_size=BATCH_SIZE,
    class_mode='categorical', subset='training'
)
val_gen = val_datagen.flow_from_directory(
    "data/val", target_size=IMG_SIZE, batch_size=BATCH_SIZE,
    class_mode='categorical', subset='validation'
)
class_names = list(train_gen.class_indices.keys())


Found 4984 images belonging to 11 classes.
Found 215 images belonging to 11 classes.


In [3]:
def plot_history(history, title):
    plt.figure(figsize=(12,4))
    plt.subplot(1,2,1)
    plt.plot(history.history['accuracy'], label='train_acc')
    plt.plot(history.history['val_accuracy'], label='val_acc')
    plt.title(f'{title} Accuracy')
    plt.legend()
    plt.subplot(1,2,2)
    plt.plot(history.history['loss'], label='train_loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.title(f'{title} Loss')
    plt.legend()
    plt.show()

In [4]:
def evaluate_model(model, generator, title):
    y_true = generator.classes
    y_pred = np.argmax(model.predict(generator), axis=1)
    print(f"Classification Report for {title}")
    print(classification_report(y_true, y_pred, target_names=class_names))

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10,8))
    sns.heatmap(cm, annot=False, cmap="Blues", xticklabels=class_names, yticklabels=class_names)
    plt.title(f'Confusion Matrix — {title}')
    plt.show()

    report = classification_report(y_true, y_pred, output_dict=True)
    return {
        "model": title,
        "accuracy": report["accuracy"],
        "precision": np.mean([report[label]["precision"] for label in class_names]),
        "recall": np.mean([report[label]["recall"] for label in class_names]),
        "f1": np.mean([report[label]["f1-score"] for label in class_names])
    }

In [8]:
def build_custom_cnn(hp):
    model = models.Sequential()
    model.add(layers.Conv2D(
        filters=hp.Int('filters_1', 32, 128, step=32),
        kernel_size=(3, 3), activation='relu',
        input_shape=IMG_SIZE + (3,)
    ))
    model.add(layers.MaxPooling2D(2, 2))
    model.add(layers.Conv2D(
        filters=hp.Int('filters_2', 64, 256, step=64),
        kernel_size=(3, 3), activation='relu'
    ))
    model.add(layers.MaxPooling2D(2, 2))
    model.add(layers.Flatten())
    model.add(layers.Dense(
        units=hp.Int('dense_units', 64, 256, step=64),
        activation='relu'
    ))
    model.add(layers.Dropout(hp.Float('dropout', 0.2, 0.5, step=0.1)))
    model.add(layers.Dense(NUM_CLASSES, activation='softmax'))
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Choice('lr', [1e-2, 1e-3, 1e-4])
        ),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [9]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
tuner = kt.Hyperband(
    build_custom_cnn,
    objective='val_accuracy',
    max_epochs=5,
    factor=3,
    directory='tuner_logs',
    project_name='custom_cnn_fish'
)

early_stop = EarlyStopping(
    monitor='val_accuracy',
    patience=2,         # stop after 2 epochs without improvement
    restore_best_weights=True
)

tuner.search(train_gen, validation_data=val_gen, epochs=5, callbacks=[early_stop])
best_hp = tuner.get_best_hyperparameters(1)[0]

model = build_custom_cnn(best_hp)
history = model.fit(train_gen, validation_data=val_gen, epochs=8)
plot_history(history, "Custom CNN")

metrics = evaluate_model(model, val_gen, "Custom CNN")
pd.DataFrame([metrics])
model.save("custom_cnn_best.h5")


Trial 4 Complete [00h 09m 20s]
val_accuracy: 0.7441860437393188

Best val_accuracy So Far: 0.7767441868782043
Total elapsed time: 00h 58m 17s

Search: Running Trial #5

Value             |Best Value So Far |Hyperparameter
96                |96                |filters_1
192               |128               |filters_2
192               |128               |dense_units
0.3               |0.4               |dropout
0.01              |0.001             |lr
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2
[1m  1/156[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:09:54[0m 27s/step - accuracy: 0.1250 - loss: 2.3761