In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
from torchvision import datasets, models, transforms

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import copy

import time
import os

# Data Transformation for Test Set

In [None]:
#plt.ion()
csv_file = pd.read_csv('/homes/ncho/comp4211/Training_set_food.csv')

labels = list(dict.fromkeys(csv_file['label']))

#Data Transformation
data_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

# Dataset and DataLoaders for Test Set

In [20]:
data_dir = '/homes/ncho/comp4211/data'

test_datasets = datasets.ImageFolder(os.path.join(data_dir, 'test'), data_transforms)

test_dataloader = torch.utils.data.DataLoader(test_datasets, batch_size=1, shuffle=False, num_workers=1)

class_names = test_datasets.classes

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('using device: {}'.format(device))

using device: cuda:0


# imshow function for tensor images

In [None]:
def imshow(imgs, title=None):
    #imshow for img(tensor)
    imgs = imgs.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    imgs = std * imgs + mean
    imgs = np.clip(imgs, 0, 1)
    plt.imshow(imgs)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)

# Test function and its procedures

In [46]:
def test_model(model, num_imgs=4):
    model.eval()
    total_images_processed = 0
    fig = plt.figure()
    total_corrects = 0

    start = time.time()
    with torch.no_grad():
        for _, (inputs, labels) in enumerate(test_dataloader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                total_images_processed += 1
                #ax = plt.subplot(num_imgs//2, 2, total_images_processed)
                #ax.axis('off')
                #ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                #imshow(inputs.cpu().data[j])

                if preds[j] == labels[j]:
                    total_corrects += 1
                
                if total_images_processed == num_imgs:
                    end = np.round(time.time() - start, 4)
                    time_per_image = np.round(end / total_images_processed, 4)
                    total_accuracy = np.round(total_corrects / num_imgs, 4)
                    
                    print('Test Accuracy: {} / {} : {}'.format(total_corrects, total_images_processed, total_accuracy))
                    print('Total time elapsed: {} s'.format(end))
                    print('Time elapsed per image: {} s'.format(time_per_image))
                    return

    end = np.round(time.time() - start, 4)
    time_per_image = np.round(end / total_images_processed, 4)
    total_accuracy = np.round(total_corrects / num_imgs, 4)
    
    print('Test Accuracy: {} / {} : {}'.format(total_corrects, total_images_processed, total_accuracy))
    print('Total time elapsed: {}'.format(end))
    print('Time elapsed per image: {}'.format(time_per_image))
    return

## check if the dataloader correctly loads the images from test set

In [47]:
#total_num_images: 540
total_num_images = len(test_dataloader)
print(total_num_images)

540


# Model testing

## ResNet-18

In [48]:
#ResNet-18 Model
plt.clf()
plt.ion()
ResNet_weights = torch.load('/homes/ncho/comp4211/weights/ResNet18_final.pt')
ResNet_model = torchvision.models.resnet18(pretrained=True)
ResNet_model.load_state_dict(ResNet_weights['model_state_dict'])
ResNet_model.to(device)
test_model(ResNet_model, num_imgs=total_num_images)
plt.ioff()
plt.show()
ResNet_model.cpu()

Test Accuracy: 512 / 540 : 0.9481
Total time elapsed: 6.6266 s
Time elapsed per image: 0.0123 s


<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

## CoAtNet with hyperparameter search

In [49]:
#CoAtNet with hyperparameter search
from coatnet import *
plt.clf()

num_blocks = [2, 2, 6, 8, 2]
channels = [16, 32, 64, 128, 256]
block_types = ['C', 'C', 'T', 'T']

CoAtNet_hp_weights = torch.load('/homes/ncho/comp4211/weights/CoAtNet_hp_search_final.pt')
CoAtNet_hp_model = CoAtNet((224, 224), 3, num_blocks=num_blocks, channels=channels, block_types=block_types, num_classes=3)
CoAtNet_hp_model.load_state_dict(CoAtNet_hp_weights['model_state_dict'])
CoAtNet_hp_model.to(device)
test_model(CoAtNet_hp_model, num_imgs=total_num_images)
plt.show()
CoAtNet_hp_model.cpu()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Test Accuracy: 400 / 540 : 0.7407
Total time elapsed: 12.7568 s
Time elapsed per image: 0.0236 s


<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

CoAtNet(
  (s0): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU()
    )
    (1): Sequential(
      (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU()
    )
  )
  (s1): Sequential(
    (0): MBConv(
      (pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (proj): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (conv): PreNorm(
        (norm): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (fn): Sequential(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

## CoAtNet0

In [50]:
#CoAtNet0
plt.clf()
CoAtNet0_weights = torch.load('/homes/ncho/comp4211/weights/CoAtNet0_final.pt')
CoAtNet0_model = coatnet_0()
CoAtNet0_model.load_state_dict(CoAtNet0_weights['model_state_dict'])
CoAtNet0_model.to(device)
test_model(CoAtNet0_model, num_imgs=total_num_images)
plt.show()
CoAtNet0_model.cpu()

Test Accuracy: 420 / 540 : 0.7778
Total time elapsed: 10.327 s
Time elapsed per image: 0.0191 s


<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

CoAtNet(
  (s0): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU()
    )
    (1): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU()
    )
  )
  (s1): Sequential(
    (0): MBConv(
      (pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (proj): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (conv): PreNorm(
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (fn): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru