# Download Dataset 
http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz

In [1]:
# !wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz

In [2]:
import random
import numpy as np
import tensorflow as tf
import torch
import os

def set_global_seed(seed=42):
    """Sets seeds for reproducibility across multiple libraries."""
    random.seed(seed)  # Python random seed
    np.random.seed(seed)  # NumPy random seed
    tf.random.set_seed(seed)  # TensorFlow random seed
    if torch is not None:
        torch.manual_seed(seed)  # PyTorch seed for CPU
        torch.cuda.manual_seed(seed)  # PyTorch seed for GPU
        torch.cuda.manual_seed_all(seed)  # If using multiple GPUs
        torch.backends.cudnn.deterministic = True  # Ensures deterministic behavior
        torch.backends.cudnn.benchmark = False  # Disables auto-optimization

    os.environ["PYTHONHASHSEED"] = str(seed)  # Ensures hash-based operations are deterministic

    print(f"Global seed set to {seed}")

# Example usage:
set_global_seed(42)

Global seed set to 42


In [3]:
import os
import random
import tarfile
import shutil
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, InceptionV3, EfficientNetB0
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from sklearn.model_selection import train_test_split
import requests

In [None]:
!pip install --upgrade wandb

Collecting wandb
  Downloading wandb-0.19.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Downloading wandb-0.19.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.8/20.8 MB[0m [31m89.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: wandb
  Attempting uninstall: wandb
    Found existing installation: wandb 0.19.1
    Uninstalling wandb-0.19.1:


In [None]:
import wandb
# from wandb.keras import WandbCallback
from wandb.integration.keras import WandbCallback
print(wandb.__version__)
# Initialize Weights & Biases

In [None]:
!wandb login 685a37da89a21c695a704814247055cc7112e6ad
# wandb.init(project="food101_experiment_tracking")

In [None]:
dataset_url = "http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz"
dataset_path = "food-101.tar.gz"
extract_folder = "food-101"

if not os.path.exists(dataset_path):
    print("Downloading dataset...")
    response = requests.get(dataset_url, stream=True)
    with open(dataset_path, 'wb') as file:
        shutil.copyfileobj(response.raw, file)
    print("Download complete.")

if not os.path.exists(extract_folder):
    print("Extracting dataset...")
    with tarfile.open(dataset_path, 'r:gz') as tar:
        tar.extractall()
    print("Extraction complete.")

In [None]:
data_dir = os.path.join(extract_folder, "images")
!pwd
print(data_dir)
all_classes = os.listdir(data_dir)
random_classes = random.sample(all_classes, 25)
print("Selected Classes:", random_classes)

In [None]:
def organize_data(src_folder, dest_folder, classes, train_ratio=0.8):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    
    for cls in tqdm(classes, desc="Processing Classes"):
        class_path = os.path.join(src_folder, cls)
        images = os.listdir(class_path)
        train_images, test_images = train_test_split(images, train_size=train_ratio, random_state=42)
        
        for dataset, image_list in zip(['train', 'test'], [train_images, test_images]):
            class_dest = os.path.join(dest_folder, dataset, cls)
            os.makedirs(class_dest, exist_ok=True)
            for img in image_list:
                shutil.copy(os.path.join(class_path, img), class_dest)
!rm -r /kaggle/working/food101_selected
organized_data_path = "food101_selected"
organize_data(data_dir, organized_data_path, random_classes)

In [None]:
def plot_class_distribution(data_path):
    class_counts = {cls: len(os.listdir(os.path.join(data_path, 'train', cls))) for cls in random_classes}
    plt.figure(figsize=(12, 5))
    sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()))
    plt.xticks(rotation=90)
    plt.title("Class Distribution")
    plt.show()

plot_class_distribution(organized_data_path)

In [None]:
# data_gen = ImageDataGenerator(rescale=1./255, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, fill_mode='nearest',shear_range=0.2,zoom_range=0.2, 
#                               horizontal_flip=True, validation_split=0.2)

# train_gen = data_gen.flow_from_directory(os.path.join(organized_data_path, 'train'), target_size=(224, 224),
#                                          batch_size=32, class_mode='categorical', subset='training')
# val_gen = data_gen.flow_from_directory(os.path.join(organized_data_path, 'train'), target_size=(224, 224),
#                                        batch_size=32, class_mode='categorical', subset='validation')

In [None]:
# def build_model(base_model):
#     # base_model.trainable = False  # Freeze base layers
#     base_model.trainable = True  # Unfreeze all layers
#     for layer in base_model.layers[:-20]:  # Keep first 20 layers frozen
#         layer.trainable = False
#     # model = Sequential([
#     #     base_model,
#     #     GlobalAveragePooling2D(),
#     #     Dense(256, activation='relu'),
#     #     Dropout(0.5),
#     #     Dense(len(random_classes), activation='softmax')
#     # ])
#     model = Sequential([
#         base_model,
#         GlobalAveragePooling2D(),
#         BatchNormalization(),
#         Dense(256, activation='relu'),
#         Dropout(0.5),
#         Dense(len(random_classes), activation='softmax')
#     ])
#     return model

# models = {
#     "ResNet50": build_model(ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))),
#     "InceptionV3": build_model(InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))),
#     "EfficientNetB0": build_model(EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3)))
# }

In [None]:
# def train_model(model, model_name, optimizer, lr):
#     # model.compile(optimizer=optimizer(lr), loss='categorical_crossentropy', metrics=['accuracy'])
#     model.compile(optimizer=optimizer(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
#     history = model.fit(train_gen, validation_data=val_gen, epochs=10, verbose=1, callbacks=[WandbCallback()])
#     model.save(f"{model_name}.h5")
#     return history


from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint

# def train_model(model, model_name, optimizer, lr):
#     model.compile(optimizer=optimizer(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    
#     callbacks = [
#         WandbMetricsLogger(),  # Tracks metrics in WandB
#         WandbModelCheckpoint(f"{model_name}.h5", save_weights_only=True)  # Saves best model
#     ]
    
#     history = model.fit(train_gen, validation_data=val_gen, epochs=10, verbose=1, callbacks=callbacks)
#     return history

# def train_model(model, model_name, optimizer, lr):
#     model.compile(optimizer=optimizer(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    
    
#     callbacks = [
#         WandbMetricsLogger(),  # Tracks metrics in WandB
#         WandbModelCheckpoint(f"{model_name}.weights.h5", save_weights_only=True)  # Fix file name issue
#     ]
    
#     history = model.fit(train_gen, validation_data=val_gen, epochs=10, verbose=1, callbacks=callbacks)
#     return history

# optimizers = [Adam, RMSprop, SGD]
# lrs = [0.001, 0.0005, 0.0001]

# for model_name, model in models.items():
#     for opt in optimizers:
#         for lr in lrs:
#             print(f"Training {model_name} with {opt.__name__} and learning rate {lr}")
#             history = train_model(model, f"{model_name}_{opt.__name__}_{lr}", opt, lr)


# from tensorflow.keras.models import clone_model

# for model_name, model in models.items():
#     for opt in optimizers:
#         for lr in lrs:
#             model_copy = clone_model(model)  # Create a fresh copy of the model
#             model_copy.set_weights(model.get_weights())  # Copy original weights
            
#             print(f"Training {model_name} with {opt.__name__} and learning rate {lr}")
#             history = train_model(model_copy, f"{model_name}_{opt.__name__}_{lr}", opt, lr)

In [None]:
# from tensorflow.keras.applications import ResNet50, InceptionV3, EfficientNetB0
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
# from tensorflow.keras.optimizers import Adam, RMSprop, SGD
# from tensorflow.keras.models import clone_model
# import wandb
# from tensorflow.keras.callbacks import ReduceLROnPlateau
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.callbacks import EarlyStopping
# from tensorflow.keras.regularizers import l2
# # from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint
# from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint
# lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)


# # Function to clean GPU memory
# def clean_gpu():
#     tf.keras.backend.clear_session()  # Clears TensorFlow session
#     gc.collect()  # Collects garbage
#     try:
#         tf.config.experimental.reset_memory_stats("GPU:0")  # Clears GPU memory stats
#     except:
#         pass

# # Build model function with trainable layers handling
# def build_model(base_model, trainable_layers=10):
#     base_model.trainable = True
#     trainable_layers = min(trainable_layers, len(base_model.layers))  # Prevent out-of-bounds error

#     for layer in base_model.layers[:-trainable_layers]:  
#         layer.trainable = False  # Freeze initial layers

#     model = Sequential([
#         base_model,
#         GlobalAveragePooling2D(),
#         BatchNormalization(),
#         Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
#         Dropout(0.6),
#         Dense(train_gen.num_classes, activation='softmax') 
#     ])
#     return model


# # Initialize models
# models = {
#     "ResNet50": build_model(ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))),
#     "InceptionV3": build_model(InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))),
#     "EfficientNetB0": build_model(EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3)))
# }


# # Training function
# def train_model(model, model_name, optimizer, lr):
#     optimizer_instance = optimizer(learning_rate=lr)
#     model.compile(optimizer=optimizer_instance, loss='categorical_crossentropy', metrics=['accuracy',"f1_score",])

#     optimizer_name = optimizer_instance.__class__.__name__  # Get correct class name
#     early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
#     callbacks = [
#         WandbMetricsLogger(),
#         WandbModelCheckpoint(f"{model_name}_{optimizer_name}_{lr}.weights.h5", save_weights_only=True)  # Ensure unique names
#     ]

#     history = model.fit(train_gen, validation_data=val_gen, epochs=3, verbose=1, callbacks=callbacks)

#     del model, optimizer_instance
#     clean_gpu()
#     return history


# # Optimizers and learning rates
# optimizers = [Adam, RMSprop, SGD]
# lrs = [0.001, 0.0005, 0.0001]

# # Loop through models, optimizers, and learning rates
# for model_name, _ in models.items():
#     for opt in optimizers:
#         for lr in lrs:
#             model = build_model(eval(model_name)(weights='imagenet', include_top=False, input_shape=(224, 224, 3)))

#             print(f"Training {model_name} with {opt.__name__} and learning rate {lr}")
#             history = train_model(model, f"{model_name}_{opt.__name__}_{lr}", opt, lr)

In [None]:
# def build_model(base_model, trainable_layers=20):
#     base_model.trainable = True
#     trainable_layers = min(trainable_layers, len(base_model.layers))  # Prevent index error
#     for layer in base_model.layers[:-trainable_layers]:  # Freeze only the required layers
#         layer.trainable = False

#     model = Sequential([
#         base_model,
#         GlobalAveragePooling2D(),
#         BatchNormalization(),
#         Dense(256, activation='relu'),
#         Dropout(0.5),
#         Dense(len(random_classes), activation='softmax')
#         Dense(train_gen.num_classes, activation='softmax')
#     ])
#     return model
    
# models = {
#     "ResNet50": build_model(ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))),
#     "InceptionV3": build_model(InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))),
#     "EfficientNetB0": build_model(EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3)))
# }

# # def train_model(model, model_name, optimizer, lr):
# #     optimizer_instance = optimizer(learning_rate=lr)  # Ensure fresh optimizer
# #     model.compile(optimizer=optimizer_instance, loss='categorical_crossentropy', metrics=['accuracy'])
    
# #     callbacks = [
# #         WandbMetricsLogger(),
# #         WandbModelCheckpoint(f"{model_name}_{optimizer.__name__}_{lr}.weights.h5", save_weights_only=True)  # Unique file names
# #     ]
    
# #     history = model.fit(train_gen, validation_data=val_gen, epochs=10, verbose=1, callbacks=callbacks)
# #     return history


# def train_model(model, model_name, optimizer, lr):
#     optimizer_instance = optimizer(learning_rate=lr)  # Ensure fresh optimizer
#     model.compile(optimizer=optimizer_instance, loss='categorical_crossentropy', metrics=['accuracy'])

#     optimizer_name = optimizer_instance.__class__.__name__  # Get class name safely

#     callbacks = [
#         WandbMetricsLogger(),
#         WandbModelCheckpoint(f"{model_name}_{optimizer_name}_{lr}.weights.h5", save_weights_only=True)
#     ]

#     history = model.fit(train_gen, validation_data=val_gen, epochs=10, verbose=1, callbacks=callbacks)
#     return history

# # for model_name, model in models.items():
# #     for opt in optimizers:
# #         for lr in lrs:
# #             model_copy = clone_model(model)  # Prevent weight contamination
# #             model_copy.set_weights(model.get_weights())
            
# #             print(f"Training {model_name} with {opt.__name__} and learning rate {lr}")
# #             history = train_model(model_copy, f"{model_name}_{opt.__name__}_{lr}", opt, lr)


# # Instead of cloning, reinitialize model with proper pre-trained weights
# for model_name, _ in models.items():
#     for opt in optimizers:
#         for lr in lrs:
#             model = build_model(eval(model_name)(weights='imagenet', include_top=False, input_shape=(224, 224, 3)))  
            
#             print(f"Training {model_name} with {opt.__name__} and learning rate {lr}")
#             history = train_model(model, f"{model_name}_{opt.__name__}_{lr}", opt, lr)



In [None]:
import os
import gc
import tensorflow as tf
from tensorflow.keras.applications import ResNet50, InceptionV3, EfficientNetB0
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2
from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint
import wandb

# Initialize WandB
wandb.init(project="CIS_image_classification")

# Function to clean GPU memory
def clean_gpu():
    tf.keras.backend.clear_session()
    gc.collect()
    try:
        tf.config.experimental.reset_memory_stats("GPU:0")
    except:
        pass

# Function to build model
def build_model(base_model, trainable_layers=10):
    base_model.trainable = True
    trainable_layers = min(trainable_layers, len(base_model.layers))
    
    for layer in base_model.layers[:-trainable_layers]:  
        layer.trainable = False
    
    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        BatchNormalization(),
        Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.6),
        Dense(train_gen.num_classes, activation='softmax') 
    ])
    return model

# Data generators
data_gen = ImageDataGenerator(
    rescale=1./255, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, 
    shear_range=0.2, zoom_range=0.2, horizontal_flip=True, validation_split=0.2
)

train_gen = data_gen.flow_from_directory(
    os.path.join(organized_data_path, 'train'), target_size=(224, 224),
    batch_size=32, class_mode='categorical', subset='training'
)
val_gen = data_gen.flow_from_directory(
    os.path.join(organized_data_path, 'train'), target_size=(224, 224),
    batch_size=32, class_mode='categorical', subset='validation'
)

# Define models
models = {
    "ResNet50": ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3)),
    "InceptionV3": InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3)),
    "EfficientNetB0": EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
}

# Training function
def train_model(model, model_name, optimizer, lr):
    optimizer_instance = optimizer(learning_rate=lr)
    model.compile(optimizer=optimizer_instance, loss='categorical_crossentropy', metrics=['accuracy'])
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
    
    callbacks = [
        WandbMetricsLogger(),
        WandbModelCheckpoint(f"{model_name}_{optimizer.__name__}_{lr}.weights.h5", save_weights_only=True),
        early_stopping,
        lr_scheduler
    ]
    
    history = model.fit(train_gen, validation_data=val_gen, epochs=3, verbose=1, callbacks=callbacks)
    
    del model, optimizer_instance
    clean_gpu()
    return history

# Optimizers and learning rates
optimizers = [Adam, RMSprop, SGD]
lrs = [0.001, 0.0005, 0.0001]

# Train models with different optimizers and learning rates
for model_name, base_model in models.items():
    for opt in optimizers:
        for lr in lrs:
            model = build_model(base_model)
            print(f"Training {model_name} with {opt.__name__} and learning rate {lr}")
            history = train_model(model, f"{model_name}_{opt.__name__}_{lr}", opt, lr)


In [None]:
def plot_feature_maps(model, image_path):
    img = Image.open(image_path).resize((224, 224))
    img_array = np.expand_dims(np.array(img) / 255.0, axis=0)
    layer_outputs = [layer.output for layer in model.layers if 'conv' in layer.name]
    activation_model = Model(inputs=model.input, outputs=layer_outputs)
    activations = activation_model.predict(img_array)
    first_layer_activation = activations[-2]
    plt.figure(figsize=(10, 10))
    for i in range(16):
        plt.subplot(4, 4, i + 1)
        plt.imshow(first_layer_activation[0, :, :, i], cmap='viridis')
        plt.axis('off')
    plt.show()

plot_feature_maps(models['ResNet50'], os.path.join(organized_data_path, 'train', random_classes[0], os.listdir(os.path.join(organized_data_path, 'train', random_classes[0]))[0]))

print("Pipeline Completed Successfully!")
