In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

Mounted at /content/drive


In [2]:
#Notebook 04- Experiments & Alternative Models
!pip install -q timm==0.9.2
!pip install -q torchinfo
!pip install -q einops

import os, random, time, json, shutil
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.metrics import f1_score, classification_report, confusion_matrix

import timm  #for our efficientnet and optional ViT model
print("timm version:", timm.__version__)
print("Torch:", torch.__version__, "CUDA:", torch.cuda.is_available())

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/68.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m82.9 MB/s[0m eta [36m0:00:00[0m
[?25htimm version: 0.9.2
Torch: 2.9.0+cu126 CUDA: True


In [3]:
# Cell 2- Config: adjust these
ROOT="/content/drive/MyDrive/food-10/food-10"
CSV_DIR=os.path.join(ROOT, "prepared_splits")
OUT_DIR="/content/food10_experiments"
DRIVE_OUT_DIR=os.path.join(ROOT,"outputs_experiments")
os.makedirs(OUT_DIR,exist_ok=True)
os.makedirs(DRIVE_OUT_DIR,exist_ok=True)

SEED=42
def set_seed(seed=SEED):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed()

CFG={
    "img_size": 224,
    "batch_size": 32,      #we can reduce it if there is OOM
    "epochs": 12,           #baseline quick; we can increase to 12-20 for final runs
    "lr": 3e-4,
    "weight_decay": 1e-4,
    "num_workers": min(8, os.cpu_count() or 4),
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "save_dir": OUT_DIR,
    "drive_out": DRIVE_OUT_DIR,
    "mixed_precision": True
}
print("Device:", CFG['device'], "Out dir:", OUT_DIR)

Device: cuda Out dir: /content/food10_experiments


In [4]:
#Cell 3- load prepared CSVs
train_df=pd.read_csv(os.path.join(CSV_DIR,"train.csv"))
val_df=pd.read_csv(os.path.join(CSV_DIR,"val.csv"))
print("Train rows:",len(train_df),"Val rows:",len(val_df))
print("Columns:",train_df.columns.tolist())
assert 'fullpath' in train_df.columns and 'label' in train_df.columns, "train.csv must include fullpath and label"
classes = sorted(train_df['class'].unique())
num_classes = len(classes)
print("Num classes:", num_classes)

Train rows: 6000 Val rows: 1500
Columns: ['path', 'class', 'fullpath', 'label']
Num classes: 10


In [5]:
#Cell 4- Transforms & dataset
IMG_SIZE=CFG['img_size']
train_transforms=transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(0.2,0.2,0.2,0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
val_transforms=transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.15)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

class Food10Dataset(Dataset):
    def __init__(self,df,transform=None):
        self.df=df.reset_index(drop=True)
        self.transform=transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self,idx):
        r=self.df.iloc[idx]
        img=Image.open(r['fullpath']).convert('RGB')
        if self.transform:
            img=self.transform(img)
        label=int(r['label'])
        return img,label

In [6]:
#Cell 5- Model factory that returns a model and parameter groups (for freezing)
def get_model(name,num_classes,pretrained=True):
    name=name.lower()
    if name=="resnet101":
        model=timm.create_model('resnet101',pretrained=pretrained,num_classes=num_classes)
    elif name=="resnet50":
        model=timm.create_model('resnet50',pretrained=pretrained,num_classes=num_classes)
    elif name=="efficientnet_b0" or name=="efficientnet-b0":
        #timm name: efficientnet_b0
        model=timm.create_model('efficientnet_b0',pretrained=pretrained,num_classes=num_classes)
    #elif name == "vit_b16" or name == "vit_b_16":
        #optional: ViT-B/16 (may be slow on Colab)
        #model=timm.create_model('vit_base_patch16_224', pretrained=pretrained, num_classes=num_classes)
    else:
        raise ValueError("Unknown model: " + name)
    return model
#Quick smoke test:
#m=get_model("efficientnet_b0", num_classes=10); print(m)

In [7]:
#Cell 6- training helpers
from sklearn.metrics import f1_score
device=torch.device(CFG['device'])
def train_one_epoch(model, loader, optimizer, criterion, scaler, device):
    model.train()
    losses=[]; preds=[]; targets=[]
    loop=tqdm(loader,desc="Train",leave=False)
    for imgs,lbls in loop:
        imgs=imgs.to(device,non_blocking=True)
        lbls=lbls.to(device,non_blocking=True)
        optimizer.zero_grad()
        with torch.amp.autocast(device_type='cuda',enabled=CFG['mixed_precision'] and device.type=='cuda'):
            out=model(imgs)
            loss=criterion(out, lbls)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        losses.append(loss.item())
        preds.extend(out.argmax(dim=1).cpu().numpy().tolist())
        targets.extend(lbls.cpu().numpy().tolist())
        loop.set_postfix(loss=np.mean(losses))
    return np.mean(losses),f1_score(targets,preds,average='macro')

def validate(model,loader,criterion,device):
    model.eval()
    losses=[]; preds=[]; targets=[]
    with torch.no_grad():
        loop=tqdm(loader,desc="Val  ",leave=False)
        for imgs, lbls in loop:
            imgs=imgs.to(device,non_blocking=True)
            lbls=lbls.to(device,non_blocking=True)
            out=model(imgs)
            loss=criterion(out,lbls)
            losses.append(loss.item())
            preds.extend(out.argmax(dim=1).cpu().numpy().tolist())
            targets.extend(lbls.cpu().numpy().tolist())
    return np.mean(losses),f1_score(targets,preds,average='macro'),targets,preds

In [8]:
#Cell 7- run an experiment with a given model name and options
def run_experiment(model_name, train_df, val_df, cfg=CFG, freeze_backbone=False, head_epochs=1, run_name=None):
    if run_name is None:
        run_name=f"{model_name}_freeze{freeze_backbone}"
    print("\n=== Running:", run_name, "===")
    num_classes=int(train_df['label'].nunique())
    model=get_model(model_name, num_classes=num_classes, pretrained=True)
    model=model.to(device)

    if freeze_backbone:
        #freeze all except classifier head
        for name,param in model.named_parameters():
            param.requires_grad=False
        #identify head parameters-timm models differ; set last fc/head trainable
        if hasattr(model,'fc'):
            for p in model.fc.parameters():
                p.requires_grad=True
        elif hasattr(model,'classifier'):
            for p in model.classifier.parameters():
                p.requires_grad=True
        elif hasattr(model,'head'):
            for p in model.head.parameters():
                p.requires_grad=True

    #Prepare dataloaders
    train_ds=Food10Dataset(train_df,transform=train_transforms)
    val_ds=Food10Dataset(val_df,transform=val_transforms)
    train_loader=DataLoader(train_ds,batch_size=cfg['batch_size'],shuffle=True,
                              num_workers=cfg['num_workers'],pin_memory=(cfg['device']=='cuda'))
    val_loader=DataLoader(val_ds, batch_size=cfg['batch_size'],shuffle=False,
                            num_workers=cfg['num_workers'],pin_memory=(cfg['device']=='cuda'))

    #optimizer uses only trainable params
    optimizer=optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),lr=cfg['lr'],weight_decay=cfg['weight_decay'])
    scheduler=optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=cfg['epochs'])
    criterion=nn.CrossEntropyLoss()
    scaler=torch.cuda.amp.GradScaler(enabled=(cfg['mixed_precision'] and device.type=='cuda'))
    best_f1=0.0
    history={"train_loss":[],"train_f1":[],"val_loss":[],"val_f1":[]}
    best_path=os.path.join(cfg['save_dir'],f"best_{run_name}.pth")
    #Optionally run head-only epochs first
    start_epoch=0
    if freeze_backbone and head_epochs>0:
        print("Training head-only for",head_epochs,"epochs")
        for e in range(head_epochs):
            tr_loss,tr_f1=train_one_epoch(model,train_loader,optimizer,criterion,scaler,device)
            val_loss,val_f1,_,_=validate(model,val_loader,criterion,device)
            scheduler.step()
            history['train_loss'].append(tr_loss); history['train_f1'].append(tr_f1)
            history['val_loss'].append(val_loss); history['val_f1'].append(val_f1)
            print(f"Head Epoch {e+1} train_f1 {tr_f1:.4f} val_f1 {val_f1:.4f}")
            if val_f1>best_f1:
                best_f1=val_f1
                torch.save({"model_state":model.state_dict(),"cfg":cfg,"epoch":e},best_path)
        #unfreeze all for full fine-tune
        print("Unfreezing all parameters for fine-tuning")
        for p in model.parameters(): p.requires_grad=True
        optimizer=optim.AdamW(model.parameters(),lr=cfg['lr']/3,weight_decay=cfg['weight_decay'])
        scheduler=optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=cfg['epochs'])

    #Full training
    for epoch in range(cfg['epochs']):
        print(f"\nEpoch {epoch+1}/{cfg['epochs']}")
        tr_loss,tr_f1=train_one_epoch(model,train_loader,optimizer,criterion,scaler,device)
        val_loss,val_f1,val_targets,val_preds=validate(model,val_loader,criterion,device)
        scheduler.step()
        history['train_loss'].append(tr_loss); history['train_f1'].append(tr_f1)
        history['val_loss'].append(val_loss); history['val_f1'].append(val_f1)
        print(f"Epoch {epoch+1} Train loss {tr_loss:.4f} f1 {tr_f1:.4f} | Val loss {val_loss:.4f} f1 {val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1=val_f1
            ckpt={"model_state":model.state_dict(),"optimizer_state":optimizer.state_dict(),"cfg":cfg,"epoch":epoch,"best_f1":best_f1}
            torch.save(ckpt, best_path)
            shutil.copy(best_path, os.path.join(cfg['drive_out'], os.path.basename(best_path)))
            print("Saved best model:", best_path)
        #save history after each epoch
        with open(os.path.join(cfg['save_dir'], f"history_{run_name}.json"), "w") as f:
            json.dump(history, f)
    print("Finished experiment:", run_name, "best_val_f1:", best_f1)
    return {"name":run_name, "best_val_f1":best_f1, "history":history, "best_path":best_path}

In [1]:
#Cell 8- run a couple of experiments and collect results
experiments=[]
#1) EfficientNet-B0: freeze head 1 epoch then full fine-tune
experiments.append(run_experiment("efficientnet_b0", train_df, val_df, cfg=CFG, freeze_backbone=True, head_epochs=1, run_name="effnetb0_freeze1"))
#2) ResNet-101: no freeze, full fine-tune
experiments.append(run_experiment("resnet101", train_df, val_df, cfg=CFG, freeze_backbone=False, head_epochs=0, run_name="resnet101_full"))
#3) Optional: ViT-B/16 (uncomment if you want)
#experiments.append(run_experiment("vit_b16", train_df, val_df, cfg=CFG, freeze_backbone=False, head_epochs=0, run_name="vitb16_full"))
#Save experiment summary table
exp_df=pd.DataFrame([{"name":e['name'],"best_val_f1":e['best_val_f1'],"best_path":e['best_path']} for e in experiments])
exp_df.to_csv(os.path.join(CFG['save_dir'], "experiment_summary.csv"), index=False)
print("Experiments finished. Summary:")
print(exp_df)

NameError: name 'run_experiment' is not defined

In [None]:
#Cell 9- Plot results from histories
import seaborn as sns
sns.set(style="whitegrid")
#collect histories
for e in experiments:
    h=e['history']
    epochs=list(range(1, len(h['val_f1'])+1))
    plt.plot(epochs,h['val_f1'],marker='o',label=e['name'])
plt.xlabel("Epoch"); plt.ylabel("Val Macro F1"); plt.title("Val Macro F1- experiments")
plt.legend(); plt.grid(True)
plt.savefig(os.path.join(CFG['save_dir'], "experiments_comparison_val_f1.png"), dpi=150)
plt.show()

In [None]:
#Cell 10- Evaluate experiment best checkpoints on the untouched test set
#This uses the same robust matching used earlier to build test_df from test.txt
def build_test_df(ROOT):
    rows=[]
    IMAGES_DIR=os.path.join(ROOT,"images")
    for cls in sorted(os.listdir(IMAGES_DIR)):
        cls_dir=os.path.join(IMAGES_DIR,cls)
        if not os.path.isdir(cls_dir): continue
        for fname in os.listdir(cls_dir):
            if fname.lower().endswith(('.jpg','.jpeg','.png')):
                rows.append({"path":f"{cls}/{fname}","class": cls,"fullpath":os.path.join(cls_dir,fname)})
    df_all=pd.DataFrame(rows)
    df_all['no_ext']=df_all['class']+"/"+df_all['path'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])
    test_txt=os.path.join(ROOT,"test.txt")
    with open(test_txt,'r') as f: tlines=[l.strip() for l in f if l.strip()]
    matched=[]; missing=[]
    for e in tlines:
        if e in df_all['path'].values: matched.append(df_all[df_all['path']==e].iloc[0])
        elif e in df_all['no_ext'].values: matched.append(df_all[df_all['no_ext']==e].iloc[0])
        else:
            base=os.path.basename(e).split('.')[0]
            cand=df_all[df_all['path'].str.contains(base)]
            if len(cand)==1: matched.append(cand.iloc[0])
            else: missing.append(e)
    if missing:
        print("Warning: missing test matches (sample):",missing[:10])
    return pd.DataFrame(matched).reset_index(drop=True),df_all
test_df,df_all=build_test_df(ROOT)
print("Test rows:",len(test_df))
def eval_checkpoint(pth,df_all,test_df):
    ckpt=torch.load(pth,map_location='cpu')
    #infer num classes from ckpt
    ms=ckpt.get('model_state',ckpt)
    fc_key=None
    for k in ms.keys():
        if k.endswith('fc.weight') or '.fc.weight' in k:
            fc_key=k; break
    num_classes_ckpt=ms[fc_key].shape[0]
    model=timm.create_model('resnet50',pretrained=False,num_classes=num_classes_ckpt)  #placeholder
    #we need to create a model of the same arch- naive approach: load the model from ckpt if cfg saved
    #For safety, try to detect model name inside ckpt['cfg'] if present
    model_name=ckpt.get('cfg',{}).get('model_name',None)
    if model_name:
        print("Detected model in checkpoint cfg:",model_name)
        model=get_model(model_name,num_classes_ckpt,pretrained=False)
    else:
        #fallback to resnet50-sized architecture
        model=get_model('resnet50',num_classes_ckpt,pretrained=False)
    model.load_state_dict(ckpt['model_state'])
    model.to(device).eval()
    val_tf=transforms.Compose([transforms.Resize(int(IMG_SIZE*1.15)),transforms.CenterCrop(IMG_SIZE),
                                 transforms.ToTensor(),transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])])
    all_preds=[]; all_trues=[]
    classes_sorted=sorted(df_all['class'].unique())
    batch=64
    for i in range(0,len(test_df),batch):
        batch_df=test_df.iloc[i:i+batch]
        imgs=[]
        for p in batch_df['fullpath'].tolist():
            imgs.append(val_tf(Image.open(p).convert('RGB')))
        x=torch.stack(imgs).to(device)
        with torch.no_grad():
            out=model(x)
            preds=out.argmax(dim=1).cpu().numpy().tolist()
        all_preds.extend(preds)
        all_trues.extend([int(train_df[train_df['class']==c]['label'].iloc[0]) for c in batch_df['class'].tolist()])
    test_f1=f1_score(all_trues, all_preds, average='macro')
    print("Eval",os.path.basename(pth),"Test Macro F1:",test_f1)
    return test_f1
#Evaluate best checkpoints saved by experiments
for row in pd.read_csv(os.path.join(CFG['save_dir'],"experiment_summary.csv")).to_dict('records'):
    pth=row['best_path']
    if os.path.exists(pth):
        try:
            _=eval_checkpoint(pth,df_all,test_df)
        except Exception as e:
            print("Evaluation failed for",pth,":",e)
    else:
        print("Checkpoint not found:",pth)