# If you want to access the version you have already modified, click "Edit"
# If you want to access the original sample code, click "...", then click "Copy & Edit Notebook"
# 增加mixup

In [1]:
## This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pass
        # print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# _exp_name = "sample"

In [3]:
# Import necessary packages.
import numpy as np
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [4]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

import torchvision
print(torchvision.__version__)

0.21.0+cu124


## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [5]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.RandomResizedCrop(128, antialias=True),
    # You may add some transforms here.
    
    # 2. 几何变换 (增加数据多样性)
    transforms.RandomHorizontalFlip(p=0.5),        # 水平翻转
    transforms.TrivialAugmentWide(),
    
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
])

# 定义一个专门给 TTA 用的温和变换
tta_tfm = transforms.Compose([
    # 1. 稍微温和一点的 Crop：最小只切到 50% 或 60%，别切 8% 那么小
    transforms.RandomResizedCrop(128, scale=(0.6, 1.0), ratio=(3.0/4.0, 4.0/3.0)),
    
    # 2. 水平翻转 (这是 TTA 涨分的核心)
    transforms.RandomHorizontalFlip(p=0.5),
    
    # 3. 颜色抖动可以保留，但强度也可以减半
    transforms.RandomApply([
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1)
    ], p=0.8),
    
    transforms.ToTensor(),
])

## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

加入mixup 将两张图片混合

In [6]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None, mode = 'train', mixup = False, alpha = 0.4):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files is not None: #为k折提供的接口
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
        self.alpha = alpha
        self.mixup = mixup
        self.mode = mode
        self.num_classes = 11 # Food-11 有 11 类
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        
        # mixup部分
        if self.mixup and self.mode == 'train':
            idx2 = np.random.randint(len(self.files))
            fname2 = self.files[idx2]
            im2 = Image.open(fname2)
            im2 = self.transform(im2)
            # train一定有label
            label2 = int(fname2.split("/")[-1].split("_")[0])
            # 生成混合比例 lambda (从 Beta 分布采样)
            lam = np.random.beta(self.alpha, self.alpha)
            # 3. 混合图片 (线性组合) [cite: 76]
            # im 和 im2 都是 Tensor，可以直接加减
            mixed_im = lam * im + (1 - lam) * im2

            # 4. 混合标签 (生成概率向量) [cite: 77]
            # 初始化全 0 向量
            label_vec = torch.zeros(self.num_classes)
            # label 1 的概率设为 lam
            label_vec[label] = lam
            # label 2 的概率累加 (1-lam) -> 用累加是为了处理 label==label2 的情况
            label_vec[label2] += (1 - lam)
            return mixed_im, label_vec

        
        return im,label


## 新的交叉熵（torch的交叉熵不适用于向量，仅适用于3这种明确类别）  

In [7]:
import torch.nn as nn
import torch.nn.functional as F

class MixupCrossEntropyLoss(nn.Module):
    def __init__(self):
        super(MixupCrossEntropyLoss, self).__init__()

    def forward(self, preds, targets):
        # preds: 模型的输出 logits (Batch_Size, 11)
        # targets: Mixup 生成的混合标签 (Batch_Size, 11)
        
        # 1. 对模型输出做 Log Softmax (数值上比 log(softmax) 更稳定)
        log_probs = F.log_softmax(preds, dim=1)
        
        # 2. 计算公式: - sum( target * log_prob )
        # dim=1 表示在类别维度求和
        loss = -torch.sum(targets * log_probs, dim=1) #逐元素相乘
        
        # 3. 对 Batch 取平均
        return loss.mean()

In [8]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

# [Step 2] 使用 ResNet18 模型
import torchvision.models as models

def get_model():
    # 加载 ResNet50 结构，不使用预训练权重
    model = models.resnet50(pretrained=False)
    
    # 修改全连接层 (fc)，因为 ResNet18 默认输出 1000 类，我们需要 11 类
    # model.fc.in_features 是 ResNet18 最后一层的输入维度 (通常是 512)
    model.fc = nn.Linear(model.fc.in_features, 11) 
    return model

# 验证一下模型是否能跑
# print(get_model())

In [9]:
_dataset_dir = "../input/ml2022spring-hw3b/food11"

In [10]:
# ==========================================
# ResNet50 K-Fold 训练代码 (带调试模式)
# ==========================================
import os
import torch
import torch.nn as nn
import torchvision.models as models
import numpy as np
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from sklearn.model_selection import KFold

# ---------------- 配置区域 ----------------
_exp_name = "resnet50"   
device = "cuda" if torch.cuda.is_available() else "cpu"

# 🔥🔥🔥 调试开关 🔥🔥🔥
debug_mode = True  # 【调试时设为 True，正式训练改 False】

if debug_mode:
    print("⚠️ 警告：当前处于调试模式！")
    n_epochs = 2         # 只跑 2 轮看有没有报错
    batch_size = 16      # 小一点无所谓
    n_folds = 2          # 假装有 2 折，实际只会跑第 1 折
    patience = 999       # 调试不需要早停
    limit_batches = 5    # 每个 Epoch 只跑 5 个 Batch
else:
    print("🚀 正式模式：火力全开！")
    n_epochs = 80        # 正式训练轮数
    batch_size = 32      # ResNet50 比较吃显存，建议 32
    n_folds = 5          # 5 折交叉验证
    patience = 15        # 早停
    limit_batches = None # 跑完所有数据

learning_rate = 0.0003
# ------------------------------------------

# 1. 准备模型定义 (你刚才写的通用版)
def get_model():
    # 加载 ResNet50 (不预训练)
    model = models.resnet50(weights=None) 
    # 动态获取输入维度 (2048)，适配性好
    model.fc = nn.Linear(model.fc.in_features, 11) 
    return model

# 2. 准备数据列表
# ⚠️ 请确保 _dataset_dir 已经定义，或者在这里手动指定
# _dataset_dir = "../input/ml2022spring-hw3b/food11" 
train_dir = os.path.join(_dataset_dir, "training")
val_dir = os.path.join(_dataset_dir, "validation")

all_train_files = sorted([os.path.join(train_dir, x) for x in os.listdir(train_dir) if x.endswith(".jpg")])
all_val_files = sorted([os.path.join(val_dir, x) for x in os.listdir(val_dir) if x.endswith(".jpg")])
all_files = np.array(all_train_files + all_val_files) 

# [调试技巧 1] 如果是调试模式，只取前 100 张图，秒加载
if debug_mode:
    all_files = all_files[:200]

# 3. 定义 Loss
criterion_mixup = MixupCrossEntropyLoss()
criterion_normal = nn.CrossEntropyLoss()

# 4. 开始 K-Fold
kf = KFold(n_splits=n_folds, shuffle=True, random_state=6666)

for fold_idx, (train_idx, val_idx) in enumerate(kf.split(all_files)):
    print(f"\n===== Starting Fold {fold_idx + 1} / {n_folds} =====")
    
    # 划分数据
    fold_train_files = all_files[train_idx]
    fold_val_files = all_files[val_idx]
    
    # 构建 Dataset (train 用 Mixup, valid 不用)
    train_set = FoodDataset(path=train_dir, tfm=train_tfm, files=fold_train_files, mixup=True, mode='train')
    valid_set = FoodDataset(path=val_dir, tfm=test_tfm, files=fold_val_files, mixup=False, mode='valid')
    
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
    valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    
    # 初始化模型 & 优化器
    model = get_model().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    
    best_acc = 0
    stale = 0
    
    for epoch in range(n_epochs):
        # --- Training ---
        model.train()
        train_loss = []
        train_accs = []
        
        # 使用 enumerate 方便计数中断
        for i, batch in enumerate(tqdm(train_loader, desc=f"Fold {fold_idx+1} Epoch {epoch+1}")):
            imgs, labels = batch
            imgs, labels = imgs.to(device), labels.to(device)
            
            logits = model(imgs)
            loss = criterion_mixup(logits, labels) # Mixup Loss
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Mixup Accuracy: 比较 argmax
            acc = (logits.argmax(dim=-1) == labels.argmax(dim=-1)).float().mean()
            train_loss.append(loss.item())
            train_accs.append(acc.item())
            
            # [调试技巧 2] 跑几个 batch 就跳出，别傻等
            if debug_mode and i >= limit_batches:
                break
            
        avg_train_loss = sum(train_loss) / len(train_loss)
        avg_train_acc = sum(train_accs) / len(train_accs)
        
        # --- Validation ---
        model.eval()
        valid_loss = []
        valid_accs = []
        
        with torch.no_grad():
            for i, batch in enumerate(valid_loader):
                imgs, labels = batch
                imgs, labels = imgs.to(device), labels.to(device)
                
                logits = model(imgs)
                loss = criterion_normal(logits, labels)
                acc = (logits.argmax(dim=-1) == labels).float().mean()
                
                valid_loss.append(loss.item())
                valid_accs.append(acc.item())
                
                # [调试技巧 2] 验证集也跳出
                if debug_mode and i >= limit_batches:
                    break
                
        avg_valid_loss = sum(valid_loss) / len(valid_loss)
        avg_valid_acc = sum(valid_accs) / len(valid_accs)
        
        print(f"[Fold {fold_idx+1} | {epoch+1}/{n_epochs}] Train Acc: {avg_train_acc:.4f}, Valid Acc: {avg_valid_acc:.4f}")
        
        # 保存最佳模型
        if avg_valid_acc > best_acc:
            print(f"🌟 New Best Fold {fold_idx+1} Model! Acc: {avg_valid_acc:.4f}")
            torch.save(model.state_dict(), f"{_exp_name}_fold{fold_idx+1}_best.ckpt")
            best_acc = avg_valid_acc
            stale = 0
        else:
            stale += 1
            if stale > patience:
                print(f"Early stopping at epoch {epoch+1}")
                break
    
    # [调试技巧 3] 跑完第一个 Fold 直接结束程序，不跑后面 4 折了
    if debug_mode:
        print("🛑 调试完成：程序已强制中断，请检查是否有报错。")
        break

## Testing and generate prediction CSV

In [11]:
# [Step 4] 极简版：集成预测 (Ensemble)
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

# 确保 _exp_name 和训练时一致
_exp_name = "resnet50"  # <--- 对应 Cell 10 的 _exp_name

# 准备测试集
test_set = FoodDataset(os.path.join(_dataset_dir, "test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# 增强数据集 (TTA用)
test_set_aug = FoodDataset(os.path.join(_dataset_dir, "test"), tfm=train_tfm)
test_loader_aug = DataLoader(test_set_aug, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True) 

# 初始化全零数组
final_logits = np.zeros((len(test_set), 11))

# 遍历每一个 Fold 的模型
# 注意：如果您在 debug 模式下只跑了 1 个 fold，这里 range(n_folds) 会尝试找后面的 fold，找不到会跳过，这是正常的
for fold_idx in range(n_folds):
    # 【修改点 1】路径改为当前目录 "./"，文件名对应训练时的 _exp_name
    model_path = f"./{_exp_name}_fold{fold_idx + 1}_best.ckpt"
    
    if not os.path.exists(model_path):
        print(f"⚠️ Model {model_path} not found, skipping.")
        continue
        
    print(f"Inference with {model_path}...")
    
    # 加载模型
    model = get_model().to(device)
    
    # 【修改点 2】取消注释，并确保加载到 device
    model.load_state_dict(torch.load(model_path, map_location=device))
    
    model.eval()
    
    # 1. 原始测试集预测
    fold_preds = []
    with torch.no_grad():
        for data, _ in tqdm(test_loader, desc=f"Fold {fold_idx+1} Original"):
            logits = model(data.to(device))
            fold_preds.append(logits.cpu().numpy())
    final_logits += 0.6 * np.concatenate(fold_preds)

    # 2. TTA (Test Time Augmentation) 预测
    tta_times = 5
    logits_aug_sum = np.zeros_like(final_logits)
    with torch.no_grad():
        for t in range(tta_times):
            fold_preds = []
            for data, _ in tqdm(test_loader_aug, desc=f"Fold {fold_idx+1} TTA {t+1}"):
                logits = model(data.to(device))
                fold_preds.append(logits.detach().cpu().numpy())
            logits_aug_sum += np.concatenate(fold_preds)
    
    logits_aug_sum /= tta_times
    final_logits += 0.4 * logits_aug_sum

# 取最大值索引作为预测结果
prediction = np.argmax(final_logits, axis=1)

print("Prediction shape:", prediction.shape)

One ../input/ml2022spring-hw3b/food11/test sample ../input/ml2022spring-hw3b/food11/test/0001.jpg
One ../input/ml2022spring-hw3b/food11/test sample ../input/ml2022spring-hw3b/food11/test/0001.jpg
Inference with /kaggle/input/hw3-version5-model/sample_fold1_best.ckpt...




  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

Inference with /kaggle/input/hw3-version5-model/sample_fold2_best.ckpt...


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

Inference with /kaggle/input/hw3-version5-model/sample_fold3_best.ckpt...


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

Inference with /kaggle/input/hw3-version5-model/sample_fold4_best.ckpt...


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

Inference with /kaggle/input/hw3-version5-model/sample_fold5_best.ckpt...


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

In [12]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)
print("Ensemble submission saved!")

Ensemble submission saved!
