In [5]:
# --- Imports, Configuration, and Dataset ---
import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
# --- FIX: Modern Imports ---
from torch.amp import autocast, GradScaler 
from torchvision import models
from sklearn.model_selection import train_test_split
from pathlib import Path

# --- CONFIGURATION ---
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATA_ROOT = 'nutrition5k_dataset'
META_PATH = os.path.join(DATA_ROOT, 'metadata', 'dish_metadata_cafe1.csv')
IMG_ROOT  = os.path.join(DATA_ROOT, 'imagery', 'realsense_overhead')
BASELINE_CHECKPOINT = 'checkpoints/best_resnet50_unet.pth' 
BATCH_SIZE = 4

print(f"Using device: {DEVICE}")

# --- METADATA LOADING ---
def load_metadata():
    if not os.path.exists(META_PATH):
        # Safe fallback if path is wrong, creates dummy DF to prevent crash
        print(f"Warning: Metadata not found at {META_PATH}")
        return pd.DataFrame(columns=[0]) 
        
    print('Loading metadata...')
    df = pd.read_csv(META_PATH, header=None, index_col=0, on_bad_lines='skip')
    
    existing_folders = set(os.listdir(IMG_ROOT)) if os.path.exists(IMG_ROOT) else set()
    df = df[df.index.isin(existing_folders)].copy()
    print(f'Valid dishes found: {len(df)}')
    return df

# --- DATASET CLASS ---
def resize_to_multiple(tensor, mul=32):
    if tensor.dim() == 3:
        _, h, w = tensor.shape
    else:
        _, _, h, w = tensor.shape
    nh = ((h + mul - 1) // mul) * mul
    nw = ((w + mul - 1) // mul) * mul
    if nh != h or nw != w:
        tensor = F.interpolate(tensor.unsqueeze(0), size=(nh, nw), mode='bilinear', align_corners=False)
        tensor = tensor.squeeze(0)
    return tensor

class Nutrition5kDataset(Dataset):
    def __init__(self, df, img_root):
        self.img_root = img_root
        self.ids = df.index.tolist()

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        dish_id = self.ids[idx]
        rgb_path = os.path.join(self.img_root, dish_id, 'rgb.png')
        dep_path = os.path.join(self.img_root, dish_id, 'depth_raw.png')

        rgb = cv2.imread(rgb_path)
        dep = cv2.imread(dep_path, cv2.IMREAD_UNCHANGED)

        # Robust check: return zeros if file is missing/corrupt
        if rgb is None or dep is None:
            return torch.zeros((3, 480, 640)), torch.zeros((1, 480, 640))

        rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
        dep = dep.astype(np.float32) / 1000.0   # mm -> meters

        rgb = torch.from_numpy(rgb.transpose(2, 0, 1)).float() / 255.0
        dep = torch.from_numpy(dep).unsqueeze(0)

        rgb = resize_to_multiple(rgb)
        dep = resize_to_multiple(dep)

        return rgb, dep

# --- PREPARE LOADERS ---
df = load_metadata()
if len(df) > 0:
    train_df, temp_df = train_test_split(df, test_size=0.30, random_state=42)
    val_df,   test_df = train_test_split(temp_df, test_size=0.50, random_state=42)
    print(f"Split -> Train: {len(train_df)} | Val: {len(val_df)} | Test: {len(test_df)}")

    train_loader = DataLoader(Nutrition5kDataset(train_df, IMG_ROOT), batch_size=BATCH_SIZE, 
                              shuffle=True, num_workers=2, pin_memory=True, drop_last=True)
    val_loader   = DataLoader(Nutrition5kDataset(val_df, IMG_ROOT), batch_size=BATCH_SIZE, 
                              shuffle=False, num_workers=2, pin_memory=True)
else:
    print("Error: No valid data found. Check paths.")

Using device: cuda
Loading metadata...
Valid dishes found: 2898
Split -> Train: 2028 | Val: 435 | Test: 435


In [6]:
# --- Model Architecture ---import os
class ResNetUNet(nn.Module):
    def __init__(self):
        super().__init__()
        res = models.resnet50(weights='IMAGENET1K_V2')

        self.enc0 = nn.Sequential(res.conv1, res.bn1, res.relu)
        self.pool = res.maxpool
        self.e1, self.e2, self.e3, self.e4 = res.layer1, res.layer2, res.layer3, res.layer4

        self.up4 = nn.ConvTranspose2d(2048, 1024, 2, 2)
        self.c4 = nn.Sequential(nn.Conv2d(2048, 1024, 3, 1, 1), nn.ReLU(True))
        self.up3 = nn.ConvTranspose2d(1024, 512, 2, 2)
        self.c3 = nn.Sequential(nn.Conv2d(1024, 512, 3, 1, 1), nn.ReLU(True))
        self.up2 = nn.ConvTranspose2d(512, 256, 2, 2)
        self.c2 = nn.Sequential(nn.Conv2d(512, 256, 3, 1, 1), nn.ReLU(True))
        self.up1 = nn.ConvTranspose2d(256, 64, 2, 2)
        self.c1 = nn.Sequential(nn.Conv2d(128, 64, 3, 1, 1), nn.ReLU(True))
        self.up0 = nn.ConvTranspose2d(64, 32, 2, 2)
        self.c0 = nn.Sequential(nn.Conv2d(32, 32, 3, 1, 1), nn.ReLU(True))
        self.out = nn.Conv2d(32, 1, 1)

    def forward(self, x):
        s0  = self.enc0(x)
        s0p = self.pool(s0)
        s1  = self.e1(s0p)
        s2  = self.e2(s1)
        s3  = self.e3(s2)
        b   = self.e4(s3)

        d = self.up4(b);  d = torch.cat([d, s3], 1); d = self.c4(d)
        d = self.up3(d);  d = torch.cat([d, s2], 1); d = self.c3(d)
        d = self.up2(d);  d = torch.cat([d, s1], 1); d = self.c2(d)
        d = self.up1(d);  d = torch.cat([d, s0], 1); d = self.c1(d)
        d = self.up0(d);  d = self.c0(d)
        return self.out(d)

In [7]:
# --- Physics-Informed Loss Functions ---
class GradientLoss(nn.Module):
    def __init__(self):
        super(GradientLoss, self).__init__()

    def forward(self, pred, gt, mask):
        pred_dx = torch.abs(pred[:, :, :, :-1] - pred[:, :, :, 1:])
        pred_dy = torch.abs(pred[:, :, :-1, :] - pred[:, :, 1:, :])
        gt_dx = torch.abs(gt[:, :, :, :-1] - gt[:, :, :, 1:])
        gt_dy = torch.abs(gt[:, :, :-1, :] - gt[:, :, 1:, :])

        weights_x = mask[:, :, :, 1:] * mask[:, :, :, :-1]
        weights_y = mask[:, :, 1:, :] * mask[:, :, :-1, :]

        return torch.mean(torch.abs(pred_dx - gt_dx) * weights_x) + \
               torch.mean(torch.abs(pred_dy - gt_dy) * weights_y)

class LShapePhysicsLoss(nn.Module):
    def __init__(self):
        super(LShapePhysicsLoss, self).__init__()
        self.kernel = torch.tensor([[0, 1, 0], [1, -4, 1], [0, 1, 0]], 
                                   dtype=torch.float32).view(1, 1, 3, 3)

    def forward(self, pred, mask):
        if self.kernel.device != pred.device:
            self.kernel = self.kernel.to(pred.device)
        curvature = F.conv2d(pred, self.kernel, padding=1)
        return torch.mean(torch.abs(curvature) * mask)

class HybridLoss(nn.Module):
    def __init__(self, alpha=1.0, beta=0.5, gamma=0.1):
        super(HybridLoss, self).__init__()
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.grad = GradientLoss()
        self.phys = LShapePhysicsLoss()

    def forward(self, pred, gt):
        mask = (gt > 0.001).float()
        l1 = torch.sum(torch.abs(pred - gt) * mask) / (torch.sum(mask) + 1e-6)
        g_loss = self.grad(pred, gt, mask)
        p_loss = self.phys(pred, mask)
        return self.alpha * l1 + self.beta * g_loss + self.gamma * p_loss

In [8]:
# --- Training Loop ---
model = ResNetUNet().to(DEVICE)

if os.path.exists(BASELINE_CHECKPOINT):
    print(f"Loading baseline from {BASELINE_CHECKPOINT}...")
    model.load_state_dict(torch.load(BASELINE_CHECKPOINT, map_location=DEVICE, weights_only=True))
else:
    print("No baseline found. Starting fresh.")

optimizer = torch.optim.Adam(model.parameters(), lr=5e-6, weight_decay=1e-5)
scaler = GradScaler(device='cuda') # Explicit device for Scaler
criterion = HybridLoss(alpha=1.0, beta=0.5, gamma=0.1).to(DEVICE)

Path("checkpoints_pinn_final").mkdir(exist_ok=True)
best_mape = float('inf')

print("\nSTARTING PHYSICS-INFORMED TRAINING...")

for epoch in range(1, 26):
    model.train()
    train_loss_accum = 0.0
    batches_count = 0
    
    for rgb, gt in train_loader:
        rgb, gt = rgb.to(DEVICE), gt.to(DEVICE)
        
        if gt.max() <= 0.001:
            continue 

        optimizer.zero_grad()
        
        # --- FIX: Correct autocast syntax for torch.amp ---
        with autocast(device_type='cuda', dtype=torch.float16):
            pred = model(rgb)
            loss = criterion(pred, gt)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss_accum += loss.item()
        batches_count += 1

    # Validation
    model.eval()
    val_mape_accum = 0.0
    valid_val_batches = 0
    
    with torch.no_grad():
        for rgb, gt in val_loader:
            rgb, gt = rgb.to(DEVICE), gt.to(DEVICE)
            
            mask = (gt > 0.001).float()
            if mask.sum() < 10: continue
                
            with autocast(device_type='cuda', dtype=torch.float16):
                pred = model(rgb)
            
            valid_gt = torch.clamp(gt, min=0.001)
            batch_mape = torch.abs(pred - gt) / valid_gt
            val_mape_accum += (batch_mape * mask).sum() / (mask.sum() + 1e-6)
            valid_val_batches += 1

    val_mape = (val_mape_accum / valid_val_batches).item() * 100 if valid_val_batches > 0 else float('inf')
    avg_loss = train_loss_accum / batches_count if batches_count > 0 else 0
    
    print(f"Epoch {epoch:02d} | Hybrid Loss: {avg_loss:.4f} | Val MAPE: {val_mape:.3f}%")

    if val_mape < best_mape:
        best_mape = val_mape
        torch.save(model.state_dict(), "checkpoints_pinn_final/BEST_PINN.pth")
        print(f"   -> NEW BEST: {best_mape:.3f}% (Saved)")

print(f"\nTRAINING COMPLETE! Best MAPE: {best_mape:.3f}%")


Loading baseline from checkpoints/best_resnet50_unet.pth...

STARTING PHYSICS-INFORMED TRAINING...


[ WARN:0@407.429] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@407.429] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@472.382] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@472.382] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@500.660] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@500.660] global loadsave.cpp:275 findDecoder imread_('

Epoch 01 | Hybrid Loss: 0.1030 | Val MAPE: 1.699%
   -> NEW BEST: 1.699% (Saved)


[ WARN:0@523.337] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@523.337] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@599.239] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@599.239] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@602.762] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@602.762] global loadsave.cpp:275 findDecoder imread_('

Epoch 02 | Hybrid Loss: 0.0758 | Val MAPE: 1.483%
   -> NEW BEST: 1.483% (Saved)


[ WARN:0@713.538] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@713.538] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@727.179] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@727.179] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@727.695] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@727.695] global loadsave.cpp:275 findDecoder imread_('

Epoch 03 | Hybrid Loss: 0.0682 | Val MAPE: 1.377%
   -> NEW BEST: 1.377% (Saved)


[ WARN:0@793.500] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@793.500] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@863.411] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@863.411] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@873.550] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@873.550] global loadsave.cpp:275 findDecoder imread_('

Epoch 04 | Hybrid Loss: 0.0633 | Val MAPE: 1.314%
   -> NEW BEST: 1.314% (Saved)


[ WARN:0@901.706] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@901.706] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@962.840] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@962.840] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@988.068] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@988.068] global loadsave.cpp:275 findDecoder imread_('

Epoch 05 | Hybrid Loss: 0.0596 | Val MAPE: 1.237%
   -> NEW BEST: 1.237% (Saved)


[ WARN:0@1107.787] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1107.787] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1117.041] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1117.041] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1133.572] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1133.573] global loadsave.cpp:275 findDecoder imr

Epoch 06 | Hybrid Loss: 0.0569 | Val MAPE: 1.217%
   -> NEW BEST: 1.217% (Saved)


[ WARN:0@1185.765] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1185.765] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1209.335] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1209.335] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1276.583] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1276.583] global loadsave.cpp:275 findDecoder imr

Epoch 07 | Hybrid Loss: 0.0540 | Val MAPE: 1.168%
   -> NEW BEST: 1.168% (Saved)


[ WARN:0@1368.177] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1368.177] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1379.998] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1379.998] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1406.650] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1406.650] global loadsave.cpp:275 findDecoder imr

Epoch 08 | Hybrid Loss: 0.0522 | Val MAPE: 1.170%


[ WARN:0@1476.983] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1476.983] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1502.004] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1502.004] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1517.304] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1517.304] global loadsave.cpp:275 findDecoder imr

Epoch 09 | Hybrid Loss: 0.0503 | Val MAPE: 1.119%
   -> NEW BEST: 1.119% (Saved)


[ WARN:0@1567.420] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1567.420] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1657.556] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1657.556] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1658.720] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1658.720] global loadsave.cpp:275 findDecoder imr

Epoch 10 | Hybrid Loss: 0.0489 | Val MAPE: 1.090%
   -> NEW BEST: 1.090% (Saved)


[ WARN:0@1720.340] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1720.340] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1740.859] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1740.859] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1770.125] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1770.125] global loadsave.cpp:275 findDecoder imr

Epoch 11 | Hybrid Loss: 0.0476 | Val MAPE: 1.085%
   -> NEW BEST: 1.085% (Saved)


[ WARN:0@1809.534] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1809.534] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1855.812] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1855.812] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1883.653] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1883.653] global loadsave.cpp:275 findDecoder imr

Epoch 12 | Hybrid Loss: 0.0466 | Val MAPE: 1.041%
   -> NEW BEST: 1.041% (Saved)


[ WARN:0@1938.518] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1938.518] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@1967.552] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@1967.552] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2017.545] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2017.545] global loadsave.cpp:275 findDecoder imr

Epoch 13 | Hybrid Loss: 0.0454 | Val MAPE: 1.047%


[ WARN:0@2093.575] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2093.575] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2123.568] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2123.568] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2133.224] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2133.224] global loadsave.cpp:275 findDecoder imr

Epoch 14 | Hybrid Loss: 0.0443 | Val MAPE: 1.027%
   -> NEW BEST: 1.027% (Saved)


[ WARN:0@2197.491] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2197.491] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2223.427] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2223.427] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2281.451] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2281.451] global loadsave.cpp:275 findDecoder imr

Epoch 15 | Hybrid Loss: 0.0433 | Val MAPE: 1.009%
   -> NEW BEST: 1.009% (Saved)


[ WARN:0@2367.172] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2367.172] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2379.451] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2379.451] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2394.574] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2394.574] global loadsave.cpp:275 findDecoder imr

Epoch 16 | Hybrid Loss: 0.0422 | Val MAPE: 1.026%


[ WARN:0@2462.214] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2462.214] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2464.343] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2464.343] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2527.279] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2527.279] global loadsave.cpp:275 findDecoder imr

Epoch 17 | Hybrid Loss: 0.0414 | Val MAPE: 0.992%
   -> NEW BEST: 0.992% (Saved)


[ WARN:0@2593.188] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2593.188] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2615.158] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2615.159] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2615.881] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2615.881] global loadsave.cpp:275 findDecoder imr

Epoch 18 | Hybrid Loss: 0.0407 | Val MAPE: 0.982%
   -> NEW BEST: 0.982% (Saved)


[ WARN:0@2738.359] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2738.359] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2739.089] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2739.089] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2821.442] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2821.442] global loadsave.cpp:275 findDecoder imr

Epoch 19 | Hybrid Loss: 0.0402 | Val MAPE: 0.988%


[ WARN:0@2841.971] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2841.971] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2875.271] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2875.271] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@2942.743] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2942.743] global loadsave.cpp:275 findDecoder imr

Epoch 20 | Hybrid Loss: 0.0395 | Val MAPE: 0.961%
   -> NEW BEST: 0.961% (Saved)


[ WARN:0@2970.611] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@2970.611] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3030.082] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3030.082] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3038.284] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3038.284] global loadsave.cpp:275 findDecoder imr

Epoch 21 | Hybrid Loss: 0.0389 | Val MAPE: 0.979%


[ WARN:0@3129.188] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3129.188] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3202.298] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3202.298] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3214.068] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3214.068] global loadsave.cpp:275 findDecoder imr

Epoch 22 | Hybrid Loss: 0.0385 | Val MAPE: 0.950%
   -> NEW BEST: 0.950% (Saved)


[ WARN:0@3263.378] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3263.379] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3329.370] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3329.370] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3333.135] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3333.135] global loadsave.cpp:275 findDecoder imr

Epoch 23 | Hybrid Loss: 0.0380 | Val MAPE: 0.945%
   -> NEW BEST: 0.945% (Saved)


[ WARN:0@3436.452] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3436.452] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3440.006] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3440.006] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3445.207] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3445.207] global loadsave.cpp:275 findDecoder imr

Epoch 24 | Hybrid Loss: 0.0376 | Val MAPE: 0.939%
   -> NEW BEST: 0.939% (Saved)


[ WARN:0@3540.093] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3540.093] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109511/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3570.310] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3570.310] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1558109714/depth_raw.png'): can't open/read file: check file path/integrity
[ WARN:0@3602.186] global loadsave.cpp:275 findDecoder imread_('nutrition5k_dataset/imagery/realsense_overhead/dish_1557862384/rgb.png'): can't open/read file: check file path/integrity
[ WARN:0@3602.186] global loadsave.cpp:275 findDecoder imr

Epoch 25 | Hybrid Loss: 0.0371 | Val MAPE: 0.937%
   -> NEW BEST: 0.937% (Saved)

TRAINING COMPLETE! Best MAPE: 0.937%


In [9]:
def evaluate_foreground_only(model, loader, device, table_cutoff=0.02):
    """
    Calculates MAPE only on the food, ignoring the table/tray.
    Assumes food is closer to the camera than the table.
    
    table_cutoff: margins in meters (e.g. 2cm) to separate food from table.
    """
    model.eval()
    food_mape_accum = 0.0
    full_mape_accum = 0.0
    valid_batches = 0
    
    print("\n--- EVALUATING FOREGROUND (FOOD ONLY) VS FULL IMAGE ---")
    
    with torch.no_grad():
        for rgb, gt in loader:
            rgb, gt = rgb.to(device), gt.to(device)
            
            # Skip corrupt images
            if gt.max() <= 0.001: continue
                
            with torch.cuda.amp.autocast():
                pred = model(rgb)
            
            # 1. Identify the Table (Background)
            # In this dataset, the max depth in the image is usually the table.
            # We define "Food" as anything significantly closer than the max depth.
            batch_max_depth = gt.amax(dim=(1, 2, 3), keepdim=True)
            food_mask = (gt < (batch_max_depth - table_cutoff)) & (gt > 0.001)
            
            # 2. Calculate Standard MAPE (Full Image)
            valid_mask = (gt > 0.001).float()
            abs_diff = torch.abs(pred - gt)
            
            # Clamp GT to avoid divide by zero
            safe_gt = torch.clamp(gt, min=0.001)
            
            # Full Image Error
            full_mape = (abs_diff / safe_gt) * valid_mask
            full_score = full_mape.sum() / (valid_mask.sum() + 1e-6)
            
            # 3. Calculate Foreground MAPE (Food Only)
            # Only count error where food_mask is True
            food_mape = (abs_diff / safe_gt) * food_mask.float()
            
            # Avoid nan if a batch has no food (empty plate)
            if food_mask.sum() > 0:
                food_score = food_mape.sum() / food_mask.sum()
                food_mape_accum += food_score.item()
                full_mape_accum += full_score.item()
                valid_batches += 1

    if valid_batches > 0:
        print(f"Full Image MAPE (with table):  {full_mape_accum/valid_batches * 100:.3f}%")
        print(f"Food-Only MAPE (strict):       {food_mape_accum/valid_batches * 100:.3f}%")
    else:
        print("No valid batches found.")

# Run the evaluation
evaluate_foreground_only(model, val_loader, DEVICE)


--- EVALUATING FOREGROUND (FOOD ONLY) VS FULL IMAGE ---


  with torch.cuda.amp.autocast():


Full Image MAPE (with table):  0.937%
Food-Only MAPE (strict):       0.935%
