In [33]:
import kagglehub
import pandas as pd
import gc
# Download latest version
path = kagglehub.dataset_download("zlatan599/mushroom1")

In [34]:
import os
import time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, mixed_precision
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.efficientnet_v2 import EfficientNetV2B0, EfficientNetV2B1
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_preprocess
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input as efficientnet_preprocess
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, TensorBoard
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam, AdamW
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from datetime import datetime
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input as densenet_preprocess


In [35]:
#speed up training and reduce memory usage

mixed_precision.set_global_policy('mixed_float16')

In [36]:
# Constants
BATCH_SIZE = 32
IMG_SIZE = (224, 224)
AUTOTUNE = tf.data.AUTOTUNE
LABEL_SMOOTHING = 0.1
EPOCHS_HEAD = 10
EPOCHS_FINE = 15

# load and clean and prepare data

download data from kaggle

In [42]:
import kagglehub
import pandas as pd
# Download latest version
path = kagglehub.dataset_download("zlatan599/mushroom1")
BASE_PATH = "/kaggle/input/mushroom1/merged_dataset"

Using Colab cache for faster access to the 'mushroom1' dataset.


In [43]:
def load_and_prepare_data():
  train = pd.read_csv('/kaggle/input/mushroom1/train.csv')
  test = pd.read_csv('/kaggle/input/mushroom1/test.csv')
  val = pd.read_csv('/kaggle/input/mushroom1/val.csv')
  # Replace Kaggle path with Colab base path
  for df in [train, test, val]:
    df["image_path"] = df["image_path"].str.replace("/kaggle/working/merged_dataset", BASE_PATH)
    #drop duplicates
    df.drop_duplicates(subset="image_path", inplace=True)
    df.reset_index(drop=True, inplace=True)
  return train, val, test

train_df, val_df, test_df = load_and_prepare_data()

convert string labels into integer indices

In [45]:
label_lookup = tf.keras.layers.StringLookup(
    vocabulary=sorted(train_df['label'].unique()),
    num_oov_indices=0,
    output_mode="int")

NUM_CLASSES = len(label_lookup.get_vocabulary())

#  data preparation pipeline construction


In [46]:
def rescale01(img):
    img = tf.cast(img, tf.float32)
    return img / 255.0

In [47]:
def build_tf_dataset(df, preprocess_fn=rescale01, augment=False, heavy=False, shuffle=False):
  def decode(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3) # decode the JPEG image into a tensor with 3 channels
    image = tf.image.resize(image, IMG_SIZE)
    #image = preprocess_input(image)
    image = preprocess_fn(image)    # apply model-specific preprocessing
    label = tf.one_hot(label_lookup(label), depth=NUM_CLASSES) # convert the string label to an integer using label_lookup, then one-hot encodes it
    return image, label

  ds = tf.data.Dataset.from_tensor_slices((df["image_path"].values, df["label"].values)) # create a TensorFlow dataset
  ds = ds.map(decode, num_parallel_calls=AUTOTUNE)
  if augment:
      if not heavy:
          aug_layer = tf.keras.Sequential([
              layers.RandomFlip("horizontal"),
              layers.RandomRotation(0.2),
              layers.RandomZoom(0.2),
              layers.RandomContrast(0.2),
          ])
      else:
          aug_layer = tf.keras.Sequential([
              layers.RandomFlip("horizontal_and_vertical"),
              layers.RandomRotation(0.3),
              layers.RandomZoom(0.3),
              layers.RandomContrast(0.4),
              layers.RandomBrightness(0.3),
          ])
      ds = ds.map(lambda x, y: (aug_layer(x), y), num_parallel_calls=AUTOTUNE)

  if shuffle:
      ds = ds.shuffle(1024)
  return ds.batch(BATCH_SIZE).prefetch(AUTOTUNE) #Batch the dataset into groups of BATCH_SIZE + prefetch batches in the background to improve training performance

# function to save the model name

In [48]:
def generate_model_name(config):
    name = f"{config['arch']}_{IMG_SIZE[0]}px"
    name += "_heavyaug" if config.get("heavyaug") else "_aug" if config.get("augment") else "_noaug"
    name += f"_finetune{config['unfreeze']}" if config.get("finetune") else "_nofinetune"
    name += f"_{config['epochs']}+{EPOCHS_FINE}ep" if config.get("finetune") else f"_{config['epochs']}ep"
    if config.get("label_smoothing"):
        name += f"_ls{config['label_smoothing']}"
    return name


# train and fine-tune pipeline

In [49]:
all_model_results = []

In [50]:
def train_and_finetune(config):

  preprocess_fn = config.get("preprocess", rescale01)

  #  build datasets
  #train_ds = build_tf_dataset(train_df, preprocess_fn, augment=config.get("augment"), heavy=config.get("heavyaug"), shuffle=True)
  train_ds = build_tf_dataset(df=train_df, preprocess_fn=preprocess_fn, augment=config.get("augment"), heavy=config.get("heavyaug"), shuffle=True)
  val_ds = build_tf_dataset(val_df, preprocess_fn)
  test_ds = build_tf_dataset(test_df, preprocess_fn)

  #  Model  construction
  base = config['backbone'](include_top=False, input_shape=(*IMG_SIZE, 3), weights='imagenet') # Load a pretrained model
  base.trainable = False # Freeze the backbone model so its weights aren’t updated during initial training

  model = models.Sequential([
    base,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(NUM_CLASSES, activation='softmax', dtype='float32')
    ])

  loss = CategoricalCrossentropy(label_smoothing=config.get("label_smoothing", 0))
  name = generate_model_name(config)

  #  Compile & Callbacks
  ##      set up a dynamic learning rate schedule using cosine decay with restarts
  ###     calculate how many batches make up one epoch
  # steps_per_epoch = len(train_df) // BATCH_SIZE
  steps_per_epoch = max(1, len(train_df) // BATCH_SIZE)
  lr1 = tf.keras.optimizers.schedules.CosineDecayRestarts(
      1e-3, 5 * steps_per_epoch,
      t_mul=2.0,
      m_mul=0.8) # create a learning rate schedule that: Starts at a high learning rate (1e-3) Gradually decays it using a cosine curve Then restarts the cycle periodically
  model.compile(optimizer=Adam(learning_rate=lr1), loss=loss, metrics=['accuracy'])

  ## Sets up callbacks for Training
  logdir = f"logs/{name}_{datetime.now().strftime('%Y%m%d-%H%M%S')}"
  callbacks = [
    EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True),
    ModelCheckpoint(f"{name}.keras", save_best_only=True, monitor="val_accuracy"),
    CSVLogger(f"{name}.csv"),
    TensorBoard(log_dir=logdir)
    ]
  # start timing
  start = time.time()
  #  Train Head
  print(f"\n🚀 Training: {name}")
  model.fit(train_ds, validation_data=val_ds, epochs=config['epochs'], callbacks=callbacks, verbose=1)

  # Fine-tune
  if config.get("finetune"):
    unfreeze_from = int(len(base.layers) * config['unfreeze'])
    for i, layer in enumerate(base.layers):
      layer.trainable = (i >= unfreeze_from and not isinstance(layer, layers.BatchNormalization))


    lr2 = tf.keras.optimizers.schedules.CosineDecayRestarts(3e-5, 8 * steps_per_epoch, t_mul=2.0, m_mul=0.9)
    model.compile(optimizer=AdamW(learning_rate=lr2, weight_decay=5e-5), loss=loss, metrics=['accuracy'])

    print(f"🔓 Fine-tuning from layer {unfreeze_from}/{len(base.layers)}")
    history_finetune = model.fit(train_ds, validation_data=val_ds,
                                     epochs=EPOCHS_FINE, callbacks=callbacks, verbose=1)
  #  Evaluate
  print(f"\n✅ Final evaluation: {name}")

  test_loss, test_acc = model.evaluate(test_ds, verbose=0)
  end = time.time()
  # Save final model explicitly
  model.save(f"{name}_final.keras")

  #log results
  all_model_results.append({
        "model_name": name,
        "arch": config['arch'],
        "finetune": config.get("finetune", False),
        "unfreeze": config.get("unfreeze", 0.0) if config.get("finetune") else 0.0,
        "heavyaug": config.get("heavyaug", False),
        "label_smoothing": config.get("label_smoothing", 0.0),
        "head_epochs": config["epochs"],
        "finetune_epochs": EPOCHS_FINE if config.get("finetune") else 0,
        "total_time_minutes": round((end - start)/60, 2),
        "test_accuracy": round(float(test_acc), 4),
        "test_loss": round(float(test_loss), 4),
        "trainable_params": int(np.sum([np.prod(v.shape) for v in model.trainable_variables])),
        "total_params": int(model.count_params())
    })

     # Clear memory
  tf.keras.backend.clear_session()
  gc.collect()

# train different configs

In [51]:
#  Configurations to Train
configs = [
    # {"arch": "baselinecnn",
    #  "backbone": lambda **kwargs: models.Sequential([
    #      layers.Input(shape=(224,224,3)),
    #      layers.Conv2D(32,3,activation='relu'),
    #      layers.MaxPooling2D(),
    #      layers.Conv2D(64,3,activation='relu'),
    #      layers.MaxPooling2D()
    #  ]),
    #  "preprocess": rescale01,
    #  "epochs": 10},

    # {"arch": "mobilenetv2",
    #  "backbone": MobileNetV2,
    #  "preprocess": mobilenet_preprocess,
    #  "epochs": 8,
    #  "augment": True},

    # {"arch": "mobilenetv2",
    #  "backbone": MobileNetV2,
    #  "preprocess": mobilenet_preprocess,
    #  "epochs": 8,
    #  "augment": True,
    #  "finetune": True,
    #  "unfreeze": 0.3},

    # {"arch": "efficientnetv2b0",
    #  "backbone": EfficientNetV2B0,
    #  "preprocess": efficientnet_preprocess,
    #  "epochs": 10,
    #  "augment": True,
    #  "finetune": True,
    #  "unfreeze": 0.3},
    {
    "arch": "densenet201",
    "backbone": DenseNet201,
    "preprocess": densenet_preprocess,
    "epochs": 10,
    "augment": True,
    "finetune": True,
    "unfreeze": 0.4,
    "label_smoothing": 0.1
    },
    {"arch": "efficientnetv2b0",
     "backbone": EfficientNetV2B0,
     "preprocess": efficientnet_preprocess,
     "epochs": 10,
     "augment": True,
     "heavyaug": True,
     "finetune": True,
     "unfreeze": 0.3,
     "label_smoothing": 0.1},

    {"arch": "efficientnetv2b1",
     "backbone": EfficientNetV2B1,
     "preprocess": efficientnet_preprocess,
     "epochs": 12,
     "augment": True,
     "finetune": True,
     "unfreeze": 0.5,
     "label_smoothing": 0.15}
]


In [None]:
for config in configs:
  train_and_finetune(config)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m74836368/74836368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step

🚀 Training: densenet201_224px_aug_finetune0.4_10+15ep_ls0.1
Epoch 1/10
[1m  20/2277[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:28:30[0m 14s/step - accuracy: 0.0625 - loss: 5.1048

In [None]:
# --- Export Summary ---
summary_df = pd.DataFrame(all_model_results)
summary_df.to_csv("model_summary.csv", index=False)
display(summary_df.sort_values("test_accuracy", ascending=False))

In [None]:
%load_ext tensorboard
%tensorboard --logdir logs
