# If you want to access the version you have already modified, click "Edit"
# If you want to access the original sample code, click "...", then click "Copy & Edit Notebook"

In [1]:
## This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('./food11'):
    for filename in filenames:
        pass
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
_exp_name = "strong"

In [3]:
# Import necessary packages.
import numpy as np
import torch
import os
import torch.nn as nn
import torchvision
import torchvision.transforms.v2 as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [4]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [5]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.PILToTensor(),
    transforms.ConvertImageDtype(torch.float),
    normalize,
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.RandomResizedCrop((128, 128), scale=(0.8, 1.0), ratio=(1.0, 1.0)),
    # You may add some transforms here.
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomChoice([
        transforms.TrivialAugmentWide(),
        transforms.RandAugment(),
    ]),
    # transforms.ColorJitter(brightness=0.4, contrast=0.2, saturation=0.2, hue=0.1),
    # ToTensor() should be the last one of the transforms.
    transforms.PILToTensor(),
    transforms.ConvertImageDtype(torch.float),
    normalize,
])


In [6]:
# Mixup or Cutmix
from torch.utils.data import default_collate

num_classes, alpha = 11, 1.0
mixup = transforms.MixUp(num_classes=num_classes, alpha=alpha)
cutmix = transforms.CutMix(num_classes=num_classes, alpha=alpha)
MorC = transforms.RandomChoice([mixup, cutmix], p=(0.5, 0.5))
collate_fn = lambda batch: MorC(*default_collate(batch))

## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [7]:
class FoodDataset(Dataset):

    def __init__(self,path=None,tfm=test_tfm,files = None, is_test=False):
        super(FoodDataset).__init__()
        self.path = path
        self.is_test = is_test
        # self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        self.files = sorted(files) if files != None else sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        # im = self.data[idx]
        if self.is_test:
            return im
        else:
            label = int(fname.split("\\")[-1].split("_")[0]) # test has no label
            return im, label

In [8]:
class MyResidual(nn.Module):
        def __init__(self):
            super(MyResidual, self).__init__()

            self.cnn_layer1 = nn.Sequential(
                nn.Conv2d(3, 64, 3, 1, 1),
                nn.BatchNorm2d(64),         # [64, 128, 128]
            )

            self.cnn_layer2 = nn.Sequential(
                nn.Conv2d(64, 64, 3, 1, 1),
                nn.BatchNorm2d(64),         # [64, 128, 128]
            )

            self.cnn_layer3 = nn.Sequential(
                nn.Conv2d(64, 128, 3, 2, 1),
                nn.BatchNorm2d(128),        # [128, 64, 64]
            )

            self.cnn_layer4 = nn.Sequential(
                nn.Conv2d(128, 128, 3, 1, 1),
                nn.BatchNorm2d(128),        # [128, 64, 64]
            )
            self.cnn_layer5 = nn.Sequential(
                nn.Conv2d(128, 256, 3, 2, 1),
                nn.BatchNorm2d(256),        # [256, 32, 32]
            )
            self.cnn_layer6 = nn.Sequential(
                nn.Conv2d(256, 256, 3, 1, 1),
                nn.BatchNorm2d(256),        # [256, 32, 32]
            )
            self.cnn_layer7 = nn.Sequential(
                nn.Conv2d(256, 512, 3, 2, 1),
                nn.BatchNorm2d(512),        # [512, 16, 16]
            )
            
            self.fc_layer = nn.Sequential(
                nn.AdaptiveAvgPool2d((4, 4)), # [512, 4, 4]
                nn.Flatten(),               # [512 * 4 * 4]
                nn.Linear(512 * 4 * 4, 1024),
                nn.ReLU(),
                nn.Dropout(0.5),

                nn.Linear(1024, 512),
                nn.ReLU(),
                nn.Dropout(0.5),
                
                nn.Linear(512, 11)
            )
            self.relu = nn.ReLU()

        def forward(self, x):
            # input (x): [batch_size, 3, 128, 128]
            # output: [batch_size, 11]

            # Extract features by convolutional layers.
            x1 = self.cnn_layer1(x)

            x1 = self.relu(x1)

            x2 = self.cnn_layer2(x1) + x1

            x2 = self.relu(x2)

            x3 = self.cnn_layer3(x2)

            x3 = self.relu(x3) + x3

            x4 = self.cnn_layer4(x3)

            x4 = self.relu(x4)

            x5 = self.cnn_layer5(x4)

            x5 = self.relu(x5)

            x6 = self.cnn_layer6(x5) + x5

            x6 = self.relu(x6)

            # The extracted feature map must be flatten before going to fully-connected layers.
            xout = self.cnn_layer7(x6)

            # The features are transformed by fully-connected layers to obtain the final logits.
            xout = self.fc_layer(xout)
            return xout

In [9]:
class Resnet34(nn.Module):
    def __init__(self):
        super(Resnet34, self).__init__()
        self.resnet = torchvision.models.resnet34(weights=None)
        self.fc_feat = self.resnet.fc.in_features
        
        self.resnet.fc = nn.Sequential(
            nn.Linear(self.fc_feat, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),

            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            
            nn.Linear(512, 11)
        )
    def forward(self, x):
        return self.resnet(x)

In [10]:
# batch_size = 64
# _dataset_dir = "./food11"
# # Construct datasets.
# # The argument "loader" tells how torchvision reads the data.
# train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
# train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
# valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
# valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

In [None]:
batch_size = 64
_dataset_dir = "./food11"

# mix train and validation data and re-divide as 8:2
trpath, vapath = os.path.join(_dataset_dir,"training"), os.path.join(_dataset_dir,"validation")
all_files = [os.path.join(trpath,x) for x in os.listdir(trpath) if x.endswith(".jpg")
             ] + [os.path.join(vapath,x) for x in os.listdir(vapath) if x.endswith(".jpg")]

# randomly shuffle and split
np.random.shuffle(all_files)
train_files, valid_files = all_files[:int(len(all_files)*0.8)], all_files[int(len(all_files)*0.8):]

# load data
train_set = FoodDataset(tfm=train_tfm, files=train_files)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, 
                          num_workers=0, pin_memory=True,
                          drop_last=True, collate_fn=collate_fn)
valid_set = FoodDataset(tfm=test_tfm, files=valid_files)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, 
                          num_workers=0, pin_memory=True)

One None sample ./food11\training\2_986.jpg
One None sample ./food11\training\2_578.jpg


In [12]:
for x, y in train_loader:
    print(x.shape, y.shape)
    break

torch.Size([32, 3, 128, 128]) torch.Size([32, 11])


In [None]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 300
patience = 100 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = Resnet34().to(device)
model.load_state_dict(torch.load("medium_best.ckpt", weights_only=False))

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.RAdam(model.parameters(), lr=5e-4, weight_decay=1e-5) 
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-5)

# Cosine scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=1e-6)

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in train_loader:

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # update lr
        scheduler.step()

        # Compute the accuracy for current batch.
        # acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
        # print(logits.shape, '\n', labels.shape)        
        # assert 1 == 2
        acc = (logits.argmax(dim=-1) == labels.to(device).argmax(dim=-1)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in valid_loader:

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

[ Train | 001/050 ] loss = 1.37985, acc = 0.77334
[ Valid | 001/050 ] loss = 0.98800, acc = 0.83594
[ Valid | 001/050 ] loss = 0.98800, acc = 0.83594 -> best
Best model found at epoch 0, saving model
[ Train | 002/050 ] loss = 1.40429, acc = 0.76694
[ Valid | 002/050 ] loss = 0.93690, acc = 0.83891
[ Valid | 002/050 ] loss = 0.93690, acc = 0.83891 -> best
Best model found at epoch 1, saving model
[ Train | 003/050 ] loss = 1.42872, acc = 0.75066
[ Valid | 003/050 ] loss = 0.96920, acc = 0.83780
[ Valid | 003/050 ] loss = 0.96920, acc = 0.83780
[ Train | 004/050 ] loss = 1.39977, acc = 0.77137
[ Valid | 004/050 ] loss = 0.95327, acc = 0.83743
[ Valid | 004/050 ] loss = 0.95327, acc = 0.83743
[ Train | 005/050 ] loss = 1.41527, acc = 0.75104
[ Valid | 005/050 ] loss = 0.97004, acc = 0.83482
[ Valid | 005/050 ] loss = 0.97004, acc = 0.83482
[ Train | 006/050 ] loss = 1.39248, acc = 0.76064
[ Valid | 006/050 ] loss = 0.93882, acc = 0.84115
[ Valid | 006/050 ] loss = 0.93882, acc = 0.84115 

In [14]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm, is_test=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

One ./food11\test sample ./food11\test\0001.jpg


## Testing and generate prediction CSV

In [15]:
model_best = Resnet34().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt", weights_only=False))
model_best.eval()
prediction = []
with torch.no_grad():
    for data in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [17]:
def pre_csv(preds, subname):
    #create test csv
    def pad4(i):
        return "0"*(4-len(str(i)))+str(i)
    df = pd.DataFrame()
    df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
    df["Category"] = preds
    df.to_csv(subname,index = False)
pre_csv(prediction, "submission.csv")

## Test Time Augmentation

In [None]:
# Test Time Augmentation
# 1个使用test_tfm测试集
test_set = FoodDataset(os.path.join(_dataset_dir, "test"), tfm=test_tfm, is_test=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# 5个使用train_tfm测试集
test_loaders = []
for i in range(5):
    test_set_i = FoodDataset(os.path.join(_dataset_dir, 'test'), tfm=train_tfm, is_test=True)
    test_loader_i = DataLoader(test_set_i, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    test_loaders.append(test_loader_i)
# model_best = Classifier().to(device)
model_best = Resnet34()
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt", weights_only=False))
model_best = model_best.to(device)
model_best.eval()

# preds存放在6个测试集(1+5)上的测试结果矩阵，每个矩阵是(3347,11)
preds = [[], [], [], [], [], [], []]
prediction = []
with torch.no_grad():
    # 用test_tfm的测试集
    for data in test_loader:
        test_preds = model_best(data.to(device)).cpu().data.numpy()
        preds[0].extend(test_preds)
    # 5个用train_tfm的测试集
    for i, loader in enumerate(test_loaders):
        for data in loader:
            test_preds = model_best(data.to(device)).cpu().data.numpy()
            preds[i+1].extend(test_preds)

preds_np = np.array(preds, dtype=object)
print('preds_np shape: {}'.format(preds_np.shape))
# 对6个测试结果加权求和
bb = 0.5 * preds_np[0] + 0.1 * preds_np[1] + 0.1 * preds_np[2] + 0.1 * preds_np[3] + 0.1 * preds_np[4] + 0.1 * preds_np[5]
print('bb shape: {}'.format(bb.shape))
prediction = np.argmax(bb, axis=1)

One ./food11\test sample ./food11\test\0001.jpg
One ./food11\test sample ./food11\test\0001.jpg
One ./food11\test sample ./food11\test\0001.jpg
One ./food11\test sample ./food11\test\0001.jpg
One ./food11\test sample ./food11\test\0001.jpg
One ./food11\test sample ./food11\test\0001.jpg
One ./food11\test sample ./food11\test\0001.jpg
preds_np shape: (7, 3347, 11)
bb shape: (3347, 11)


In [25]:
pre_csv(prediction, 'submission_resnet34_ttm.csv')

In [None]:
# vote for most frequent result, pereds_np shape [6, 3347, 11], so preds_np[0] get 2 votes, others get 1 vote
def voting(preds, num_classes=11):
    cls = np.argmax(preds, axis=-1)                     # (6, 3347)
    weights = np.array([2, 1, 1, 1, 1, 1], dtype=float)
    # 对每个样本沿 axis=0（6 次预测）做加权众数
    final = np.apply_along_axis(
        lambda x: np.bincount(x, weights=weights, minlength=num_classes).argmax(),
        axis=0, arr=cls
    )                                                   # (3347,)
    return final

pre_csv(voting(preds_np, 11), 'submission_resnet34_ttm_vote.csv')