In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torch.utils.data as data_utils
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm

import matplotlib.pyplot as plt # For data viz
import pandas as pd
import numpy as np
import sys
from tqdm.notebook import tqdm

# 1. Loading in Data

In [2]:
class FoodImageDataset(Dataset):
    def __init__(self, data_dir, transform = None):
        self.data = ImageFolder(data_dir, transform = transform)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx]
    
    @property
    def classes(self):
        return self.data.classes

In [3]:
dataset = FoodImageDataset(data_dir = '../input/food41/images')

In [4]:
len(dataset)

101000

In [5]:
# get a dictionary associating target values with the correct folder name
data_dir = '/kaggle/input/food41/images'
target_to_class = {v: k for k, v in ImageFolder(data_dir).class_to_idx.items()}
print(target_to_class)

{0: 'apple_pie', 1: 'baby_back_ribs', 2: 'baklava', 3: 'beef_carpaccio', 4: 'beef_tartare', 5: 'beet_salad', 6: 'beignets', 7: 'bibimbap', 8: 'bread_pudding', 9: 'breakfast_burrito', 10: 'bruschetta', 11: 'caesar_salad', 12: 'cannoli', 13: 'caprese_salad', 14: 'carrot_cake', 15: 'ceviche', 16: 'cheese_plate', 17: 'cheesecake', 18: 'chicken_curry', 19: 'chicken_quesadilla', 20: 'chicken_wings', 21: 'chocolate_cake', 22: 'chocolate_mousse', 23: 'churros', 24: 'clam_chowder', 25: 'club_sandwich', 26: 'crab_cakes', 27: 'creme_brulee', 28: 'croque_madame', 29: 'cup_cakes', 30: 'deviled_eggs', 31: 'donuts', 32: 'dumplings', 33: 'edamame', 34: 'eggs_benedict', 35: 'escargots', 36: 'falafel', 37: 'filet_mignon', 38: 'fish_and_chips', 39: 'foie_gras', 40: 'french_fries', 41: 'french_onion_soup', 42: 'french_toast', 43: 'fried_calamari', 44: 'fried_rice', 45: 'frozen_yogurt', 46: 'garlic_bread', 47: 'gnocchi', 48: 'greek_salad', 49: 'grilled_cheese_sandwich', 50: 'grilled_salmon', 51: 'guacamole

In [6]:
# ensure that dataset outputs all images in the size 224 x 224
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(), # convert images to a PyTorch Tensor
])

data_dir = '/kaggle/input/food41/images'
dataset = FoodImageDataset(data_dir, transform)

In [7]:
image, label = dataset[200] # check that any image is now a PyTorch Tensor
image.shape # check that size of the 3D tensor is correctly 224 x 224

torch.Size([3, 224, 224])

In [8]:
# iterate over the dataset
for image, label in dataset:
    break

In [9]:
dataloader = DataLoader(dataset, batch_size = 32, shuffle = True)
# note-to-self: batch_size indicates how many examples we should pull each time we iterate over the DataLoader
# shuffle = True is typically only done when training data, but this is not necessary for a test set / validation set

In [10]:
# iterate over the dataloader
for images, labels in dataloader:
    break

In [11]:
images.shape, labels.shape

(torch.Size([32, 3, 224, 224]), torch.Size([32]))

In [12]:
labels # labels are in a random order, as expected

tensor([40, 87, 24,  4, 51, 13, 30, 87, 37, 14, 63, 15, 49, 14, 16, 65, 73, 69,
        20, 17, 96, 59, 73, 44, 22, 32, 20, 24, 28, 54, 76, 65])

# 2. Developing PyTorch Model

In [13]:
model = timm.create_model('resnet50', pretrained = True)
print(model)

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     

In [14]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_set, test_set = random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_set, batch_size=4, shuffle=True)
test_loader = DataLoader(test_set, batch_size=4, shuffle=False)

In [15]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [16]:
model.to(device)
#training model
num_epoch = 1
train_losses, val_losses = [], []
for epoch in range(num_epoch):
    model.train()
    running_loss = 0.0
    optimizer.zero_grad()
    for images, labels in tqdm(train_loader, desc='Training loop', miniters=50):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs,labels)
        loss.backward
        optimizer.step()
        running_loss += loss.item() * labels.size(0)
    train_loss = running_loss / len(train_loader.dataset)
    train_losses.append(train_loss)

Training loop:   0%|          | 0/20200 [00:00<?, ?it/s]

In [None]:
model.eval()
running_loss = 0.0
correct = 0
total = 0

#testing model
with torch.no_grad(): 
    for images, labels in tqdm(test_loader, desc='Validation loop', miniters=50): 
        images, labels = images.to(device), labels.to(device) 

        outputs = model(images)
        loss = criterion(outputs, labels) 
        running_loss += loss.item() * labels.size(0) 

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_loss = running_loss / len(test_loader.dataset)
accuracy = 100 * correct / total

print('Accuracy on the test set: {}%'.format(accuracy)) 
print(f'Test Loss: {test_loss:.4f}') 
print('correct: ' .format(correct))

Validation loop:   0%|          | 0/5050 [00:00<?, ?it/s]

In [27]:
from sklearn.metrics import confusion_matrix
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics

model.eval()
true_labels = []
predicted_labels = []

for images, labels in tqdm(test_loader, desc='Validation loop', miniters=50):
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    true_labels.extend(labels.cpu().numpy())
    predicted_labels.extend(predicted.cpu().numpy())
    
conf_matrix = metrics.confusion_matrix(true_labels, predicted_labels)

print("Confusion Matrix:")
print(conf_matrix)


Validation loop:   0%|          | 0/5050 [00:00<?, ?it/s]

Confusion Matrix:
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 2 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=range(10), yticklabels=range(10))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()