# Midterm Expectation

- Baseline
- Experiment (i.e., something different; e.x., spreadsheet)
- Timeline (what we have done, and what we will be doing)

# Dependencies

In [1]:
import os

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torchvision
from torchvision.transforms import (
    CenterCrop,
    Compose,
    RandAugment,
    RandomHorizontalFlip,
    RandomRotation,
    ToTensor,
)
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import math

# Mount Google Drive

## Install gdfuse

In [2]:
!sudo add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!sudo apt-get update -qq 2>&1 > /dev/null
!sudo apt -y install -qq google-drive-ocamlfuse 2>&1 > /dev/null
!google-drive-ocamlfuse



debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: www-browser: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: links2: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: elinks: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: links: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: lynx: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: w3m: not found
xdg-open: no method available for opening 'https://accounts.google.com/o/oauth2/auth?client_id=564921029129.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fgd-ocaml-auth.appspot.

## Fetch API Key

In [3]:
!sudo apt-get install -qq w3m # to act as web browser 
!xdg-settings set default-web-browser w3m.desktop # to set default browser
%cd /content
!mkdir drive
%cd drive
!mkdir MyDrive
%cd ..
%cd ..
!google-drive-ocamlfuse /content/drive/MyDrive

debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 2.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package libgpm2:amd64.
(Reading database ... (Reading database ... 5%(Reading database ... 10%(Reading database ... 15%(Reading database ... 20%(Reading database ... 25%(Reading database ... 30%(Reading database ... 35%(Reading database ... 40%(Reading database ... 45%(Reading database ... 50%(Reading database ... 55%(Reading database ... 60%(Reading database ... 65%(Reading database ... 70%(Reading database ... 75%(Reading database ... 80%(Reading database ... 85%(Reading database ... 90%(Readi

# Unzip Data

In [4]:
! pip install gdown
! cd /content/ && gdown https://drive.google.com/uc?id=1zD7fUAt12L16ywPSjRsj4IqL-Lqg13zK
# ! cp /content/drive/MyDrive/11785/project/data.zip /content/
! cd /content && unzip data.zip
! rm /content/data.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: data/imagery/realsense_overhead/train/dish_1562169840/rgb.png  
   creating: data/imagery/realsense_overhead/train/dish_1562170163/
  inflating: data/imagery/realsense_overhead/train/dish_1562170163/depth_color.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170163/depth_raw.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170163/rgb.png  
   creating: data/imagery/realsense_overhead/train/dish_1562170236/
  inflating: data/imagery/realsense_overhead/train/dish_1562170236/depth_color.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170236/depth_raw.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170236/rgb.png  
   creating: data/imagery/realsense_overhead/train/dish_1562170331/
  inflating: data/imagery/realsense_overhead/train/dish_1562170331/depth_color.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170331/depth_raw.pn

# Global Variables

In [2]:
DATA_PATH = "/content/data"
METADATA_DIR = f"{DATA_PATH}/metadata"
IMAGERY_DIR = f"{DATA_PATH}/imagery/realsense_overhead"

# Helper Functions for Data

In [3]:
def read_csv_variable_cols(filepath: str) -> pd.DataFrame:
    """https://stackoverflow.com/a/57824142.
    We only read the first 6 columns to retrieve required labels.
    """
    ### Loop the data lines
    with open(filepath, 'r') as temp_f:
        # get No of columns in each line
        col_count = [ len(l.split(",")) for l in temp_f.readlines() ]

    ### Generate column names  (names will be 0, 1, 2, ..., maximum columns - 1)
    column_names = [i for i in range(0, max(col_count))]

    ### Read csv
    return pd.read_csv(filepath, header=None, delimiter=",", names=column_names, low_memory=False).iloc[:,:6]

# Ingredient and Dish Metadata (Groun Truths)

## Data Format

### Training-testing data

- "imagery/realsense_overhead/dish_<id>" contains the images as the input data
- "dish_ids" contains training-testing splits

### Labels (metadata)

All labels need to be preprocessed. For each dish, we need to extract the following:
- total calorie
- mass (optional according to the paper)
- the amount of the three macronutrients (fat, carb, protein)

It doesn't seem that bad - we don't need to process the ingredients because they are purely there for constructing the labels. For our multi-task learning, we only need to have the above labels. The three tasks are:

1. Calorie
2. Macronutrients (fat, carb, protein)
3. Mass (optional)

这样一来我们可以把labels和image data放在一起，每次返回input和expected output.

## Dish Metadata

In [4]:
# Metadata for dishes has variable numbers of columns per row.
# Can do similar stuff to dish_metadata_cafe2.csv
# The first 6 columns: [dish_id, total_calories, total_mass, total_fat, total_carb, total_protein]
dish_metadata_1 = read_csv_variable_cols(f"{METADATA_DIR}/dish_metadata_cafe1.csv")
# Rename the columns
dish_metadata_1 = dish_metadata_1.rename(columns={0:"dish_id", 1:"total_calories", 2:"total_mass", 3:"total_fat", 4:"total_carb", 5:"total_protein"})

dish_metadata_2 = read_csv_variable_cols(f"{METADATA_DIR}/dish_metadata_cafe2.csv")
# Rename the columns
dish_metadata_2 = dish_metadata_2.rename(columns={0:"dish_id", 1:"total_calories", 2:"total_mass", 3:"total_fat", 4:"total_carb", 5:"total_protein"})

dish_metadata = pd.concat((dish_metadata_1, dish_metadata_2), ignore_index=True)
# Convert to dictionary
labels_dict = dish_metadata.set_index("dish_id").to_dict("index")

# Hyperparameters

In [38]:
config = {
    'epochs': 150,
    'batch_size': 32,
    'lr': 1e-3,
}

class Config:
    def __init__(self, config):
        for k, v in config.items():
            setattr(self, k, v)

config = Config(config)

# Datasets and DataLoaders

In [48]:
transforms = Compose([CenterCrop((256, 256)), RandAugment(3), ToTensor()])
class RGBDataset(Dataset):
    """4.2 The input resolution to the
    network is a 256x256 image, where images were downsized
    and center cropped in order to retain the most salient dish
    region.

    我们baseline应该只用RGB就行 (根据4.2).
    """

    def __init__(self, data_dir, transforms=transforms, labels=labels_dict, train=True):
        self.data_dir = data_dir
        self.transforms = transforms
        self.labels = labels
        self.train = train

        # # ['dish_1556572657', 'dish_1556573514', 'dish_1556575014', 'dish_1556575083', 'dish_1556575124', 'dish_1556575273', 'dish_1556575327']
        dirs = os.listdir(self.data_dir)

        self.dish_ids = []
        for dir in dirs:
            if "rgb.png" in os.listdir(os.path.join(self.data_dir,dir)):
                self.dish_ids.append(dir)

        self.dish_ids.sort()

        self.img_paths = list(
            map(
                lambda fname: os.path.join(self.data_dir, fname),
                self.dish_ids,
            )
        )

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        rgb_path = f"{self.img_paths[idx]}/rgb.png"
        dish_id = self.dish_ids[idx]
        transformed_img = self.transforms(Image.open(rgb_path))
        if self.train:
            label = torch.tensor(list(self.labels[dish_id].values()))
            return transformed_img, label
        else:
            return transformed_img

In [40]:
TRAIN_DIR = f"{IMAGERY_DIR}/train"
VALID_DIR = f"{IMAGERY_DIR}/test"
# TEST_DIR = IMAGERY_DIR

train_dataset = RGBDataset(TRAIN_DIR, labels=labels_dict)
valid_dataset = RGBDataset(VALID_DIR, labels=labels_dict)
# test_dataset = RGBDataset(TEST_DIR, train=False)

train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2)
valid_loader = DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2)
# test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2)

num_training_batches = len(train_loader)

# InceptionV2

In [13]:
def ConvBNReLU(in_channels,out_channels,kernel_size,stride=1,padding=0):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,padding=padding),
        nn.BatchNorm2d(out_channels),
        nn.ReLU6(inplace=True),
    )

def ConvBNReLUFactorization(in_channels,out_channels,kernel_sizes,paddings):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_sizes, stride=1,padding=paddings),
        nn.BatchNorm2d(out_channels),
        nn.ReLU6(inplace=True)
    )

class InceptionV2ModuleA(nn.Module):
    def __init__(self, in_channels,out_channels1,out_channels2reduce, out_channels2, out_channels3reduce, out_channels3, out_channels4):
        super(InceptionV2ModuleA, self).__init__()

        self.branch1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)

        self.branch2 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
            ConvBNReLU(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_size=3, padding=1),
        )

        self.branch3 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels,out_channels=out_channels3reduce,kernel_size=1),
            ConvBNReLU(in_channels=out_channels3reduce, out_channels=out_channels3, kernel_size=3, padding=1),
            ConvBNReLU(in_channels=out_channels3, out_channels=out_channels3, kernel_size=3, padding=1),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1),
        )

    def forward(self, x):
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out4 = self.branch4(x)
        out = torch.cat([out1, out2, out3, out4], dim=1)
        return out

class InceptionV2ModuleB(nn.Module):
    def __init__(self, in_channels,out_channels1,out_channels2reduce, out_channels2, out_channels3reduce, out_channels3, out_channels4):
        super(InceptionV2ModuleB, self).__init__()

        self.branch1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)

        self.branch2 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
            ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2reduce, kernel_sizes=[1,3],paddings=[0,1]),
            ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_sizes=[3,1],paddings=[1, 0]),
        )

        self.branch3 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels,out_channels=out_channels3reduce,kernel_size=1),
            ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3reduce,kernel_sizes=[1, 3], paddings=[0, 1]),
            ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3reduce,kernel_sizes=[3, 1], paddings=[1, 0]),
            ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3reduce, kernel_sizes=[1, 3], paddings=[0, 1]),
            ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3,kernel_sizes=[3, 1], paddings=[1, 0]),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1),
        )

    def forward(self, x):
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out4 = self.branch4(x)
        out = torch.cat([out1, out2, out3, out4], dim=1)
        return out

class InceptionV2ModuleC(nn.Module):
    def __init__(self, in_channels,out_channels1,out_channels2reduce, out_channels2, out_channels3reduce, out_channels3, out_channels4):
        super(InceptionV2ModuleC, self).__init__()

        self.branch1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)

        self.branch2_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1)
        self.branch2_conv2a = ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_sizes=[1,3],paddings=[0,1])
        self.branch2_conv2b = ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_sizes=[3,1],paddings=[1,0])

        self.branch3_conv1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels3reduce,kernel_size=1)
        self.branch3_conv2 = ConvBNReLU(in_channels=out_channels3reduce, out_channels=out_channels3, kernel_size=3,stride=1,padding=1)
        self.branch3_conv3a = ConvBNReLUFactorization(in_channels=out_channels3, out_channels=out_channels3, kernel_sizes=[3, 1],paddings=[1, 0])
        self.branch3_conv3b = ConvBNReLUFactorization(in_channels=out_channels3, out_channels=out_channels3, kernel_sizes=[1, 3],paddings=[0, 1])

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1),
        )

    def forward(self, x):
        out1 = self.branch1(x)
        x2 = self.branch2_conv1(x)
        out2 = torch.cat([self.branch2_conv2a(x2), self.branch2_conv2b(x2)],dim=1)
        x3 = self.branch3_conv2(self.branch3_conv1(x))
        out3 = torch.cat([self.branch3_conv3a(x3), self.branch3_conv3b(x3)], dim=1)
        out4 = self.branch4(x)
        out = torch.cat([out1, out2, out3, out4], dim=1)
        return out

class InceptionV3ModuleD(nn.Module):
    def __init__(self, in_channels,out_channels1reduce,out_channels1,out_channels2reduce, out_channels2):
        super(InceptionV3ModuleD, self).__init__()

        self.branch1 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels1reduce, kernel_size=1),
            ConvBNReLU(in_channels=out_channels1reduce, out_channels=out_channels1, kernel_size=3,stride=2,padding=1)
        )

        self.branch2 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
            ConvBNReLU(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_size=3, stride=1, padding=1),
            ConvBNReLU(in_channels=out_channels2, out_channels=out_channels2, kernel_size=3, stride=2,padding=1),
        )

        self.branch3 = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)

    def forward(self, x):
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out = torch.cat([out1, out2, out3], dim=1)
        return out

class InceptionAux(nn.Module):
    def __init__(self, in_channels,out_channels):
        super(InceptionAux, self).__init__()

        self.auxiliary_avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.auxiliary_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=128, kernel_size=1)
        self.auxiliary_conv2 = nn.Conv2d(in_channels=128, out_channels=768, kernel_size=5,stride=1)
        self.auxiliary_dropout = nn.Dropout(p=0.7)
        self.auxiliary_linear1 = nn.Linear(in_features=768, out_features=out_channels)

    def forward(self, x):
        x = self.auxiliary_conv1(self.auxiliary_avgpool(x))
        x = self.auxiliary_conv2(x)
        x = x.view(x.size(0), -1)
        out = self.auxiliary_linear1(self.auxiliary_dropout(x))
        return out

class InceptionV2(nn.Module):
    def __init__(self, num_classes=1000, stage='train'):
        super(InceptionV2, self).__init__()
        self.stage = stage

        self.block1 = nn.Sequential(
            ConvBNReLU(in_channels=3, out_channels=64, kernel_size=7,stride=2,padding=3),
            nn.MaxPool2d(kernel_size=3,stride=2,padding=1),
        )

        self.block2 = nn.Sequential(
            ConvBNReLU(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=3, stride=2,padding=1),
        )

        self.block3 = nn.Sequential(
            InceptionV2ModuleA(in_channels=192,out_channels1=64,out_channels2reduce=64, out_channels2=64, out_channels3reduce=64, out_channels3=96, out_channels4=32),
            InceptionV2ModuleA(in_channels=256, out_channels1=64, out_channels2reduce=64, out_channels2=96,out_channels3reduce=64, out_channels3=96, out_channels4=64),
            InceptionV3ModuleD(in_channels=320, out_channels1reduce=128, out_channels1=160, out_channels2reduce=64,out_channels2=96),
        )

        self.block4 = nn.Sequential(
            InceptionV2ModuleB(in_channels=576, out_channels1=224, out_channels2reduce=64, out_channels2=96,out_channels3reduce=96, out_channels3=128, out_channels4=128),
            InceptionV2ModuleB(in_channels=576, out_channels1=192, out_channels2reduce=96, out_channels2=128,out_channels3reduce=96, out_channels3=128, out_channels4=128),
            InceptionV2ModuleB(in_channels=576, out_channels1=160, out_channels2reduce=128, out_channels2=160,out_channels3reduce=128, out_channels3=128, out_channels4=128),
            InceptionV2ModuleB(in_channels=576, out_channels1=96, out_channels2reduce=128, out_channels2=192,out_channels3reduce=160, out_channels3=160, out_channels4=128),
            InceptionV3ModuleD(in_channels=576, out_channels1reduce=128, out_channels1=192, out_channels2reduce=192,out_channels2=256),
        )

        self.block5 = nn.Sequential(
            InceptionV2ModuleC(in_channels=1024, out_channels1=352, out_channels2reduce=192, out_channels2=160,out_channels3reduce=160, out_channels3=112, out_channels4=128),
            InceptionV2ModuleC(in_channels=1024, out_channels1=352, out_channels2reduce=192, out_channels2=160,
                               out_channels3reduce=192, out_channels3=112, out_channels4=128)
        )

        self.avg_pool = nn.AdaptiveAvgPool2d((2,2))
        self.dropout = nn.Dropout(p=0.5)
    

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.avg_pool(x)
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        return x

# Resnet50

In [14]:
import torch.utils.model_zoo as model_zoo
model_urls = {
    'resnet18': 'https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth',
    'resnet34': 'https://s3.amazonaws.com/pytorch/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://s3.amazonaws.com/pytorch/models/resnet50-19c8e357.pth',
    'resnet101': 'https://s3.amazonaws.com/pytorch/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://s3.amazonaws.com/pytorch/models/resnet152-b121ed2d.pth',
}

class Bottleneck(nn.Module):
  expansion = 4

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(Bottleneck, self).__init__()
    
    self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
    self.bn1 = nn.BatchNorm2d(planes)
    
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
                 padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    
    self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(planes * 4)
    self.relu = nn.ReLU(inplace=True)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)

    out = self.conv3(out)
    out = self.bn3(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out
    
    
class ResNet(nn.Module):
  def __init__(self, block, layers, num_classes=1000):
    self.inplanes = 64
    super().__init__()
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                 bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change
    self.layer1 = self._make_layer(block, 64, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)   # different
    self.avgpool = nn.AvgPool2d(7)
    self.fc = nn.Linear(512 * block.expansion, num_classes)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

  def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
        nn.Conv2d(self.inplanes, planes * block.expansion,
              kernel_size=1, stride=stride, bias=False),
        nn.BatchNorm2d(planes * block.expansion),
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
      layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)

    return x

def resnet50(pretrained=True):
  """Constructs a ResNet-50 model.
  Args:
    pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 4, 6, 3])
  if pretrained:
    model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
  return model


# ConvNext

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F


def truncated_normal_(tensor, mean=0, std=0.09):
    with torch.no_grad():
        size = tensor.shape
        tmp = tensor.new_empty(size + (4,)).normal_()
        valid = (tmp < 2) & (tmp > -2)
        ind = valid.max(-1, keepdim=True)[1]
        tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
        tensor.data.mul_(std).add_(mean)
        return tensor


class ConvNeXtBlock(nn.Module):
    """ The architecture of this block is as follows :
    
    DepthWise conv -> Permute to (N, H, W, C); [Channel Last]; Layer_norm -> Linear -> GELU -> Linear -> Permute Back

    Channel Last is used in input dimensions because its faster in PyTorch
    
    """

    def __init__(
        self, in_channel, depth_rate=0.0, layer_scale_init_value=1e-6
    ):
        super(ConvNeXtBlock, self).__init__()

        """Using Group covolution using groups as in the in_channel so it behaves as Depth Wise Convolution"""
        self.depthWiseConv = nn.Conv2d(
            in_channel, in_channel, kernel_size=7, padding=3, groups=in_channel
        )

        self.norm = Layer_norm(in_channel, eps=1e-6)

        """point wise convolution with 1x1 conv is similar to a Linear Layer"""
        self.pointWiseConv1 = nn.Linear(in_channel, 4 * in_channel)

        self.activation = nn.GELU()

        self.pointWiseConv2 = nn.Linear(4 * in_channel, in_channel)

        self.gamma = (
            nn.Parameter(
                layer_scale_init_value * torch.ones((in_channel)),
                requires_grad=True,
            )
            if layer_scale_init_value > 0
            else None
        )

        """Stochastic Depth aims to shrink the depth of a network during training, 
        while keeping it unchanged during testing. This is achieved by randomly dropping 
        entire ResBlocks during training and bypassing their transformations through 
        skip connections."""
        self.dropPath = nn.Identity()

    def forward(self, x):
        in_tensor = x
        x = self.depthWiseConv(x)
        x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
        x = self.norm(x)
        x = self.pointWiseConv1(x)
        x = self.activation(x)
        x = self.pointWiseConv2(x)
        if self.gamma is not None:
            x = self.gamma * x
        x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)

        x = in_tensor + self.dropPath(x)

        return x


class Layer_norm(nn.Module):
    def __init__(self, normShape, eps=1e-6, input_format="Channel_Last"):
        super(Layer_norm, self).__init__()
        self.weight = nn.Parameter(torch.ones(normShape))
        self.bias = nn.Parameter(torch.zeros(normShape))
        self.eps = eps
        self.dataFormat = input_format
        if self.dataFormat not in ["Channel_Last", "Channel_First"]:
            raise NotImplementedError
        self.normShape = (normShape,)

    def forward(self, x):
        if self.dataFormat == "Channel_Last":
            return F.layer_norm(
                x, self.normShape, self.weight, self.bias, self.eps
            )
        elif self.dataFormat == "Channel_First":
            u = x.mean(1, keepdim=True)
            s = (x - u).pow(2).mean(1, keepdim=True)
            x = (x - u) / torch.sqrt(s + self.eps)
            x = self.weight[:, None, None] * x + self.bias[:, None, None]
            return x


class ConvNeXt(nn.Module):
    """
    Args:
        in_channels (int): Number of input image channels. Default: 3
        num_classes (int): Number of classes for classification head. Default: 100
        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
        drop_path_rate (float): Stochastic depth rate. Default: 0.
        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
    
    """

    def __init__(
        self,
        in_channels=3,
        num_classes=4096,
        depths=[3, 3, 9, 3],
        dims=[96, 192, 384, 768],
        drop_path_rate=0.0,
        layer_scale_init_value=1e-6,
        head_init_scale=1.0,
    ):

        super(ConvNeXt, self).__init__()

        self.downsample_layers = nn.ModuleList()
        stem = nn.Sequential(
            nn.Conv2d(in_channels, dims[0], kernel_size=4, stride=4),
            Layer_norm(dims[0], eps=1e-6, input_format="Channel_First"),
        )
        self.downsample_layers.append(stem)

        for i in range(3):
            downsample_layer = nn.Sequential(
                Layer_norm(dims[i], eps=1e-6, input_format="Channel_First"),
                nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2),
            )
            self.downsample_layers.append(downsample_layer)

        self.stages = nn.ModuleList()
        dp_rates = [x for x in torch.linspace(0, drop_path_rate, sum(depths))]
        cur = 0

        for i in range(4):
            stage = nn.Sequential(
                *[
                    ConvNeXtBlock(
                        in_channel=dims[i],
                        depth_rate=dp_rates[cur + j],
                        layer_scale_init_value=layer_scale_init_value,
                    )
                    for j in range(depths[i])
                ]
            )

            self.stages.append(stage)
            cur += depths[i]

        self.norm = nn.LayerNorm(dims[-1], eps=1e-6)
        self.head = nn.Linear(dims[-1], num_classes)

        self.apply(self.init_weights)
        self.head.weight.data.mul_(head_init_scale)
        self.head.bias.data.mul_(head_init_scale)

    def init_weights(self, m):
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            truncated_normal_(m.weight, std=0.02)
            nn.init.constant_(m.bias, 0)

    def forward_stages(self, x):
        for i in range(4):
            x = self.downsample_layers[i](x)
            x = self.stages[i](x)
        return self.norm(
            x.mean([-2, -1])
        )  # global average pooling, (N, C, H, W) -> (N, C)

    def forward(self, x, return_feats=False):
        x = self.forward_stages(x)
        if return_feats is True:
            return x
        x = self.head(x)

        return x


def convnext_tiny(**kwargs):
    model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
    return model


def convnext_small(**kwargs):
    model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
    return model


def convnext_base(**kwargs):
    model = ConvNeXt(
        depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs
    )
    return model

# MobileNetV3

In [49]:
import warnings
from functools import partial
from typing import Any, Callable, List, Optional, Sequence

import torch
from torch import Tensor, nn
from torchvision.ops.misc import ConvNormActivation
from torchvision.ops.misc import SqueezeExcitation as SElayer
from torch.hub import load_state_dict_from_url

__all__ = ["MobileNetV3", "mobilenet_v3_large", "mobilenet_v3_small"]


model_urls = {
    "mobilenet_v3_large": "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth",
    "mobilenet_v3_small": "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth",
}


def _make_divisible(
    v: float, divisor: int, min_value: Optional[int] = None
) -> int:
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class SqueezeExcitation(SElayer):
    """DEPRECATED
    """

    def __init__(self, input_channels: int, squeeze_factor: int = 4):
        squeeze_channels = _make_divisible(input_channels // squeeze_factor, 8)
        super().__init__(
            input_channels, squeeze_channels, scale_activation=nn.Hardsigmoid
        )
        self.relu = self.activation
        delattr(self, "activation")
        warnings.warn(
            "This SqueezeExcitation class is deprecated and will be removed in future versions. "
            "Use torchvision.ops.misc.SqueezeExcitation instead.",
            FutureWarning,
        )


class InvertedResidualConfig:
    # Stores information listed at Tables 1 and 2 of the MobileNetV3 paper
    def __init__(
        self,
        input_channels: int,
        kernel: int,
        expanded_channels: int,
        out_channels: int,
        use_se: bool,
        activation: str,
        stride: int,
        dilation: int,
        width_mult: float,
    ):
        self.input_channels = self.adjust_channels(input_channels, width_mult)
        self.kernel = kernel
        self.expanded_channels = self.adjust_channels(
            expanded_channels, width_mult
        )
        self.out_channels = self.adjust_channels(out_channels, width_mult)
        self.use_se = use_se
        self.use_hs = activation == "HS"
        self.stride = stride
        self.dilation = dilation

    @staticmethod
    def adjust_channels(channels: int, width_mult: float):
        return _make_divisible(channels * width_mult, 8)


class InvertedResidual(nn.Module):
    # Implemented as described at section 5 of MobileNetV3 paper
    def __init__(
        self,
        cnf: InvertedResidualConfig,
        norm_layer: Callable[..., nn.Module],
        se_layer: Callable[..., nn.Module] = partial(
            SElayer, scale_activation=nn.Hardsigmoid
        ),
    ):
        super().__init__()
        if not (1 <= cnf.stride <= 2):
            raise ValueError("illegal stride value")

        self.use_res_connect = (
            cnf.stride == 1 and cnf.input_channels == cnf.out_channels
        )

        layers: List[nn.Module] = []
        activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU

        # expand
        if cnf.expanded_channels != cnf.input_channels:
            layers.append(
                ConvNormActivation(
                    cnf.input_channels,
                    cnf.expanded_channels,
                    kernel_size=1,
                    norm_layer=norm_layer,
                    activation_layer=activation_layer,
                )
            )

        # depthwise
        stride = 1 if cnf.dilation > 1 else cnf.stride
        layers.append(
            ConvNormActivation(
                cnf.expanded_channels,
                cnf.expanded_channels,
                kernel_size=cnf.kernel,
                stride=stride,
                dilation=cnf.dilation,
                groups=cnf.expanded_channels,
                norm_layer=norm_layer,
                activation_layer=activation_layer,
            )
        )
        if cnf.use_se:
            squeeze_channels = _make_divisible(cnf.expanded_channels // 4, 8)
            layers.append(se_layer(cnf.expanded_channels, squeeze_channels))

        # project
        layers.append(
            ConvNormActivation(
                cnf.expanded_channels,
                cnf.out_channels,
                kernel_size=1,
                norm_layer=norm_layer,
                activation_layer=None,
            )
        )

        self.block = nn.Sequential(*layers)
        self.out_channels = cnf.out_channels
        self._is_cn = cnf.stride > 1

    def forward(self, input: Tensor) -> Tensor:
        result = self.block(input)
        if self.use_res_connect:
            result += input
        return result


class MobileNetV3(nn.Module):
    def __init__(
        self,
        inverted_residual_setting: List[InvertedResidualConfig],
        last_channel: int,
        num_classes: int = 4096,
        block: Optional[Callable[..., nn.Module]] = None,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
        **kwargs: Any
    ) -> None:
        """
        MobileNet V3 main class

        Args:
            inverted_residual_setting (List[InvertedResidualConfig]): Network structure
            last_channel (int): The number of channels on the penultimate layer
            num_classes (int): Number of classes
            block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet
            norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
        """
        super().__init__()

        if not inverted_residual_setting:
            raise ValueError(
                "The inverted_residual_setting should not be empty"
            )
        elif not (
            isinstance(inverted_residual_setting, Sequence)
            and all(
                [
                    isinstance(s, InvertedResidualConfig)
                    for s in inverted_residual_setting
                ]
            )
        ):
            raise TypeError(
                "The inverted_residual_setting should be List[InvertedResidualConfig]"
            )

        if block is None:
            block = InvertedResidual

        if norm_layer is None:
            norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)

        layers: List[nn.Module] = []

        # building first layer
        firstconv_output_channels = inverted_residual_setting[0].input_channels
        layers.append(
            ConvNormActivation(
                3,
                firstconv_output_channels,
                kernel_size=3,
                stride=2,
                norm_layer=norm_layer,
                activation_layer=nn.Hardswish,
            )
        )

        # building inverted residual blocks
        for cnf in inverted_residual_setting:
            layers.append(block(cnf, norm_layer))

        # building last several layers
        lastconv_input_channels = inverted_residual_setting[-1].out_channels
        lastconv_output_channels = 6 * lastconv_input_channels
        layers.append(
            ConvNormActivation(
                lastconv_input_channels,
                lastconv_output_channels,
                kernel_size=1,
                norm_layer=norm_layer,
                activation_layer=nn.Hardswish,
            )
        )

        self.features = nn.Sequential(*layers)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Sequential(
            nn.Linear(lastconv_output_channels, last_channel),
            nn.Hardswish(inplace=True),
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(last_channel, num_classes),
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def _forward_impl(self, x: Tensor) -> Tensor:
        x = self.features(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)

        x = self.classifier(x)

        return x

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)


def _mobilenet_v3_conf(
    arch: str,
    width_mult: float = 1.0,
    reduced_tail: bool = False,
    dilated: bool = False,
    **kwargs: Any
):
    reduce_divider = 2 if reduced_tail else 1
    dilation = 2 if dilated else 1

    bneck_conf = partial(InvertedResidualConfig, width_mult=width_mult)
    adjust_channels = partial(
        InvertedResidualConfig.adjust_channels, width_mult=width_mult
    )

    if arch == "mobilenet_v3_large":
        inverted_residual_setting = [
            bneck_conf(16, 3, 16, 16, False, "RE", 1, 1),
            bneck_conf(16, 3, 64, 24, False, "RE", 2, 1),  # C1
            bneck_conf(24, 3, 72, 24, False, "RE", 1, 1),
            bneck_conf(24, 5, 72, 40, True, "RE", 2, 1),  # C2
            bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
            bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
            bneck_conf(40, 3, 240, 80, False, "HS", 2, 1),  # C3
            bneck_conf(80, 3, 200, 80, False, "HS", 1, 1),
            bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
            bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
            bneck_conf(80, 3, 480, 112, True, "HS", 1, 1),
            bneck_conf(112, 3, 672, 112, True, "HS", 1, 1),
            bneck_conf(
                112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation
            ),  # C4
            bneck_conf(
                160 // reduce_divider,
                5,
                960 // reduce_divider,
                160 // reduce_divider,
                True,
                "HS",
                1,
                dilation,
            ),
            bneck_conf(
                160 // reduce_divider,
                5,
                960 // reduce_divider,
                160 // reduce_divider,
                True,
                "HS",
                1,
                dilation,
            ),
        ]
        last_channel = adjust_channels(1280 // reduce_divider)  # C5
    elif arch == "mobilenet_v3_small":
        inverted_residual_setting = [
            bneck_conf(16, 3, 16, 16, True, "RE", 2, 1),  # C1
            bneck_conf(16, 3, 72, 24, False, "RE", 2, 1),  # C2
            bneck_conf(24, 3, 88, 24, False, "RE", 1, 1),
            bneck_conf(24, 5, 96, 40, True, "HS", 2, 1),  # C3
            bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
            bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
            bneck_conf(40, 5, 120, 48, True, "HS", 1, 1),
            bneck_conf(48, 5, 144, 48, True, "HS", 1, 1),
            bneck_conf(
                48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation
            ),  # C4
            bneck_conf(
                96 // reduce_divider,
                5,
                576 // reduce_divider,
                96 // reduce_divider,
                True,
                "HS",
                1,
                dilation,
            ),
            bneck_conf(
                96 // reduce_divider,
                5,
                576 // reduce_divider,
                96 // reduce_divider,
                True,
                "HS",
                1,
                dilation,
            ),
        ]
        last_channel = adjust_channels(1024 // reduce_divider)  # C5
    else:
        raise ValueError("Unsupported model type {}".format(arch))

    return inverted_residual_setting, last_channel


def _mobilenet_v3_model(
    arch: str,
    inverted_residual_setting: List[InvertedResidualConfig],
    last_channel: int,
    pretrained: bool,
    progress: bool,
    **kwargs: Any
):
    model = MobileNetV3(inverted_residual_setting, last_channel, **kwargs)
    if pretrained:
        if model_urls.get(arch, None) is None:
            raise ValueError("No checkpoint is available for model type {}".format(arch))
        state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
        model.load_state_dict(state_dict)
    return model


def mobilenet_v3_large(
    pretrained: bool = False, progress: bool = True, **kwargs: Any
) -> MobileNetV3:
    """
    Constructs a large MobileNetV3 architecture from
    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    arch = "mobilenet_v3_large"
    inverted_residual_setting, last_channel = _mobilenet_v3_conf(
        arch, **kwargs
    )
    return _mobilenet_v3_model(
        arch,
        inverted_residual_setting,
        last_channel,
        pretrained,
        progress,
        **kwargs
    )


def mobilenet_v3_small(
    pretrained: bool = False, progress: bool = True, **kwargs: Any
) -> MobileNetV3:
    """
    Constructs a small MobileNetV3 architecture from
    `"Searching for MobileNetV3" <https://arxiv.org/abs/1905.02244>`_.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    arch = "mobilenet_v3_small"
    inverted_residual_setting, last_channel = _mobilenet_v3_conf(
        arch, **kwargs
    )
    return _mobilenet_v3_model(
        arch,
        inverted_residual_setting,
        last_channel,
        pretrained,
        progress,
        **kwargs
    )


# Available Backbones

In [50]:
BACKBONES_MAP = {
    "inceptionv2": {
        "config": [1, 1, 3],
        "class": InceptionV2,
    },
    "resnet50": {
        "config": None,
        "class": resnet50,
    },
    "convnext_base": {
        "config": None,
        "class": convnext_base,
    },
    "convnext_small": {
        "config": None,
        "class": convnext_small,
    },
    "convnext_tiny": {
        "config": None,
        "class": convnext_tiny, 
    },
    "mobilenetv3_small":{
        "config": None,
        "class": mobilenet_v3_small
    },
}

# Baseline Model

In [17]:
class BaseNet(nn.Module):
    def __init__(self, backbone="inceptionv2"):
        """Available backbones are:
        - InceptionV2([1,1,3])
        - resnet50()
        - convnext_base()
        """
        super().__init__()
        # self.backbone = InceptionV2([1, 1, 3])
        # self.backbone = resnet50()
        self.backbone_name = backbone
        backbone_config = BACKBONES_MAP[backbone]["config"]
        self.backbone = BACKBONES_MAP[backbone]["class"](backbone_config) if backbone_config is not None else BACKBONES_MAP[backbone]["class"]()
        self.fc = nn.Linear(1000, 4096) 
        self.fc1 = nn.Linear(4096, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc_calories = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.Linear(4096, 1)
        )
        self.fc_mass = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.Linear(4096, 1)
        )
        self.fc_mc = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.Linear(4096, 3)
        )

    def forward(self, x):
        x = self.backbone(x)
        if self.backbone_name == 'inceptionv2':
            x = self.fc(x) # Comment it if backbone is resnet or convnext
        x = self.fc2(self.fc1(x))

        x_cal = self.fc_calories(x)
        x_mass = self.fc_mass(x)
        x_mn = self.fc_mc(x)

        return x_cal, x_mass, x_mn

# Multi-task Loss

In [18]:
class MultiTaskLearner(nn.Module):
    def __init__(self, model: nn.Module):
        super(MultiTaskLearner, self).__init__()
        self.model = model
        self.criterion = nn.L1Loss()

    def forward(self, x, y):
        # 1 x 5 Tensor [total_calories, total_mass, total_fat, total_carb, total_protein]

        out_cal, out_mass, out_mn = self.model(x)

        loss_calorie = self.criterion(out_cal, y[:, 0:1])
        
        loss_mass = self.criterion(out_mass, y[:, 1:2])

        loss_mn = self.criterion(out_mn, y[:, 2:])

        loss_total = loss_calorie + loss_mass + loss_mn

        return loss_total

# Delete Model

In [51]:
torch.cuda.empty_cache()
del model

NameError: ignored

# Utility Funs

In [45]:
CHECKPOINT_PATH = "/content/drive/MyDrive/checkpoints"
def create_dir_if_not_exists(dirpath: str):
    """Create the specified directory with all intermediate directories if necessary."""
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

def save_checkpoint(
    epoch: int,
    loss: float,
    model: nn.Module,
    model_name: str,
    checkpoint_path: str = CHECKPOINT_PATH,
):
    create_dir_if_not_exists(checkpoint_path)
    torch.save(
        {"epoch": epoch, "loss": loss, "model_state_dict": model.state_dict()},
        os.path.join(checkpoint_path, model_name),
    )


def load_checkpoint(filepath: str):
    state_dict = torch.load(filepath)
    epoch, loss, model_state_dict = (
        state_dict["epoch"],
        state_dict["loss"],
        state_dict["model_state_dict"],
    )
    return epoch, loss, model_state_dict

# Training

In [52]:
print(f"Cuda is available: {torch.cuda.is_available()}")
model = BaseNet(backbone="mobilenetv3_small")
model.cuda()
learner = MultiTaskLearner(model)

optimizer = torch.optim.Adam(model.parameters(), config.lr, weight_decay=0.9)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.epochs*len(train_loader))
scaler = torch.cuda.amp.GradScaler()

Cuda is available: True


In [53]:
for epoch in range(config.epochs):
    model.train()
    total_loss = 0

    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train')

    for i, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()

        x = x.cuda()
        y = y.cuda()

        # with torch.cuda.amp.autocast():     
        loss = learner(x, y)

        # Update # correct & loss as we go
        total_loss += float(loss)

        # Compute training metrics
        train_loss = float(total_loss / (i + 1))
        cur_lr = float(optimizer.param_groups[0]['lr'])


        # tqdm lets you add some details so you can monitor training as you train.
        batch_bar.set_postfix(
            loss="{:.04f}".format(train_loss),
            lr="{:.04f}".format(cur_lr))
        
        # Another couple things you need for FP16. 
        # scaler.scale(loss).backward() # This is a replacement for loss.backward()
        loss.backward()
        # scaler.step(optimizer) # This is a replacement for optimizer.step()
        optimizer.step()
        # scaler.update() # This is something added just for FP16

        scheduler.step() # We told scheduler T_max that we'd call step() (len(train_loader) * epochs) many times.

        batch_bar.update() # Update tqdm bar

    batch_bar.close() # You need this to close the tqdm bar

    train_loss = total_loss / len(train_loader)
    
    # Save the model every 3 epochs
    if epoch % 3 == 0:
        save_checkpoint(epoch, train_loss, model, "resnet")

    # You can add validation per-epoch here if you would like
    model.eval()
    batch_bar = tqdm(total=len(valid_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
    total_loss = 0
    for i, (x, y) in enumerate(valid_loader):

        x = x.cuda()
        y = y.cuda()

        with torch.no_grad():
            loss = learner(x, y)
        

        total_loss += float(loss)

        batch_bar.set_postfix(loss="{:.04f}".format(float(total_loss / (i + 1))))

        batch_bar.update()
        
    batch_bar.close()

    # scheduler.step(float(total_loss / (i + 1)))

    valid_loss = total_loss / len(valid_loader)

    print("Epoch {}/{}: Train Loss {:.04f}, Learning Rate {:.04f}, Valid Loss {:.04f}".format(
        epoch + 1, config.epochs, train_loss, cur_lr, valid_loss))




Epoch 1/150: Train Loss 387.1260, Learning Rate 0.0010, Valid Loss 374.7617




Epoch 2/150: Train Loss 230.4670, Learning Rate 0.0010, Valid Loss 388.3470




Epoch 3/150: Train Loss 213.6929, Learning Rate 0.0010, Valid Loss 368.1053




Epoch 4/150: Train Loss 198.3535, Learning Rate 0.0010, Valid Loss 355.7117




Epoch 5/150: Train Loss 199.0322, Learning Rate 0.0010, Valid Loss 367.6785




Epoch 6/150: Train Loss 190.0637, Learning Rate 0.0010, Valid Loss 367.5923




Epoch 7/150: Train Loss 195.5990, Learning Rate 0.0010, Valid Loss 358.9621




Epoch 8/150: Train Loss 189.6682, Learning Rate 0.0010, Valid Loss 351.7280




Epoch 9/150: Train Loss 189.1524, Learning Rate 0.0010, Valid Loss 347.5546




Epoch 10/150: Train Loss 181.2734, Learning Rate 0.0010, Valid Loss 361.2845




Epoch 11/150: Train Loss 186.9731, Learning Rate 0.0010, Valid Loss 357.1871




Epoch 12/150: Train Loss 180.8465, Learning Rate 0.0010, Valid Loss 349.2063




Epoch 13/150: Train Loss 176.3562, Learning Rate 0.0010, Valid Loss 331.1574




Epoch 14/150: Train Loss 179.4397, Learning Rate 0.0010, Valid Loss 334.5748




Epoch 15/150: Train Loss 174.2992, Learning Rate 0.0010, Valid Loss 316.1560




Epoch 16/150: Train Loss 173.7356, Learning Rate 0.0010, Valid Loss 329.7723




Epoch 17/150: Train Loss 177.8231, Learning Rate 0.0010, Valid Loss 302.5525




Epoch 18/150: Train Loss 170.1599, Learning Rate 0.0010, Valid Loss 317.9308




Epoch 19/150: Train Loss 168.6377, Learning Rate 0.0010, Valid Loss 319.1150




Epoch 20/150: Train Loss 169.3864, Learning Rate 0.0010, Valid Loss 304.5007




Epoch 21/150: Train Loss 168.3576, Learning Rate 0.0010, Valid Loss 256.1921




Epoch 22/150: Train Loss 164.2707, Learning Rate 0.0009, Valid Loss 266.8299




Epoch 23/150: Train Loss 161.2903, Learning Rate 0.0009, Valid Loss 242.4893




Epoch 24/150: Train Loss 167.6017, Learning Rate 0.0009, Valid Loss 238.5592




Epoch 25/150: Train Loss 162.5497, Learning Rate 0.0009, Valid Loss 286.7686




Epoch 26/150: Train Loss 161.1078, Learning Rate 0.0009, Valid Loss 254.5234




Epoch 27/150: Train Loss 160.3502, Learning Rate 0.0009, Valid Loss 269.5814




Epoch 28/150: Train Loss 155.9292, Learning Rate 0.0009, Valid Loss 273.8328




Epoch 29/150: Train Loss 160.2241, Learning Rate 0.0009, Valid Loss 282.3850




Epoch 30/150: Train Loss 159.3006, Learning Rate 0.0009, Valid Loss 244.0671




Epoch 31/150: Train Loss 162.6966, Learning Rate 0.0009, Valid Loss 249.0955




Epoch 32/150: Train Loss 160.3408, Learning Rate 0.0009, Valid Loss 269.8251




Epoch 33/150: Train Loss 157.8764, Learning Rate 0.0009, Valid Loss 264.8010




Epoch 34/150: Train Loss 155.2322, Learning Rate 0.0009, Valid Loss 317.0804




Epoch 35/150: Train Loss 149.9644, Learning Rate 0.0009, Valid Loss 261.7719




Epoch 36/150: Train Loss 155.8702, Learning Rate 0.0009, Valid Loss 262.2878




Epoch 37/150: Train Loss 154.2998, Learning Rate 0.0009, Valid Loss 277.7137




Epoch 38/150: Train Loss 153.5741, Learning Rate 0.0008, Valid Loss 250.6246




Epoch 39/150: Train Loss 147.4033, Learning Rate 0.0008, Valid Loss 336.4295


Train:   0%|          | 0/81 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2509b8fe60>
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2509b8fe60>
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1358, in __del__
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._par

Epoch 40/150: Train Loss 152.6365, Learning Rate 0.0008, Valid Loss 232.4423




Epoch 41/150: Train Loss 154.8848, Learning Rate 0.0008, Valid Loss 249.7169




Epoch 42/150: Train Loss 152.4767, Learning Rate 0.0008, Valid Loss 261.7432




Epoch 43/150: Train Loss 148.9851, Learning Rate 0.0008, Valid Loss 258.7209




Epoch 44/150: Train Loss 146.6299, Learning Rate 0.0008, Valid Loss 247.6554




Epoch 45/150: Train Loss 144.8417, Learning Rate 0.0008, Valid Loss 236.1707




Epoch 46/150: Train Loss 145.9958, Learning Rate 0.0008, Valid Loss 232.7023




Epoch 47/150: Train Loss 147.7254, Learning Rate 0.0008, Valid Loss 241.7765




Epoch 48/150: Train Loss 149.7088, Learning Rate 0.0008, Valid Loss 264.8148




Epoch 49/150: Train Loss 147.3642, Learning Rate 0.0008, Valid Loss 246.6165


Val:   0%|          | 0/29 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2509b8fe60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2509b8fe60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers


Epoch 50/150: Train Loss 153.6247, Learning Rate 0.0008, Valid Loss 300.7258


Train:   0%|          | 0/81 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2509b8fe60>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2509b8fe60>
Traceback (most recent call last):
    assert self._parent_pid == os.getpid(), 'can only test a child process'
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1358, in __del__
    self._shutdown_workers()
AssertionError: can only test a child process
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_worke

Epoch 51/150: Train Loss 146.5864, Learning Rate 0.0007, Valid Loss 245.3036




Epoch 52/150: Train Loss 145.6410, Learning Rate 0.0007, Valid Loss 254.4723




Epoch 53/150: Train Loss 146.2634, Learning Rate 0.0007, Valid Loss 295.3615




Epoch 54/150: Train Loss 139.8706, Learning Rate 0.0007, Valid Loss 274.4745




Epoch 55/150: Train Loss 144.8080, Learning Rate 0.0007, Valid Loss 268.1921




Epoch 56/150: Train Loss 142.7473, Learning Rate 0.0007, Valid Loss 237.3477




Epoch 57/150: Train Loss 143.7603, Learning Rate 0.0007, Valid Loss 231.2356




Epoch 58/150: Train Loss 144.0850, Learning Rate 0.0007, Valid Loss 256.2427




Epoch 59/150: Train Loss 141.3267, Learning Rate 0.0007, Valid Loss 251.8315




Epoch 60/150: Train Loss 140.2299, Learning Rate 0.0007, Valid Loss 232.3440




Epoch 61/150: Train Loss 143.9652, Learning Rate 0.0006, Valid Loss 243.6678




Epoch 62/150: Train Loss 141.1815, Learning Rate 0.0006, Valid Loss 252.5888




Epoch 63/150: Train Loss 138.5536, Learning Rate 0.0006, Valid Loss 241.3144




Epoch 64/150: Train Loss 142.2611, Learning Rate 0.0006, Valid Loss 244.3739




Epoch 65/150: Train Loss 132.7539, Learning Rate 0.0006, Valid Loss 234.0161




Epoch 66/150: Train Loss 136.8685, Learning Rate 0.0006, Valid Loss 234.3545




Epoch 67/150: Train Loss 141.8048, Learning Rate 0.0006, Valid Loss 254.2646




Epoch 68/150: Train Loss 136.9512, Learning Rate 0.0006, Valid Loss 236.4573




Epoch 69/150: Train Loss 139.9140, Learning Rate 0.0006, Valid Loss 310.3708




Epoch 70/150: Train Loss 134.6902, Learning Rate 0.0006, Valid Loss 253.4290




Epoch 71/150: Train Loss 139.2192, Learning Rate 0.0005, Valid Loss 237.9320




Epoch 72/150: Train Loss 134.5387, Learning Rate 0.0005, Valid Loss 240.7386




Epoch 73/150: Train Loss 133.2361, Learning Rate 0.0005, Valid Loss 240.9330




Epoch 74/150: Train Loss 140.1229, Learning Rate 0.0005, Valid Loss 243.7397




Epoch 75/150: Train Loss 138.8697, Learning Rate 0.0005, Valid Loss 259.3873




Epoch 76/150: Train Loss 141.2142, Learning Rate 0.0005, Valid Loss 227.3463




Epoch 77/150: Train Loss 135.7192, Learning Rate 0.0005, Valid Loss 237.9784




Epoch 78/150: Train Loss 138.8996, Learning Rate 0.0005, Valid Loss 248.1943




Epoch 79/150: Train Loss 135.2945, Learning Rate 0.0005, Valid Loss 261.6547




Epoch 80/150: Train Loss 132.8988, Learning Rate 0.0004, Valid Loss 238.5687




Epoch 81/150: Train Loss 136.3719, Learning Rate 0.0004, Valid Loss 246.4634




Epoch 82/150: Train Loss 133.5209, Learning Rate 0.0004, Valid Loss 230.1010




Epoch 83/150: Train Loss 132.4346, Learning Rate 0.0004, Valid Loss 241.5934




Epoch 84/150: Train Loss 131.0842, Learning Rate 0.0004, Valid Loss 240.4002




Epoch 85/150: Train Loss 129.6940, Learning Rate 0.0004, Valid Loss 258.8005




Epoch 86/150: Train Loss 129.3419, Learning Rate 0.0004, Valid Loss 276.7866




Epoch 87/150: Train Loss 134.4993, Learning Rate 0.0004, Valid Loss 235.7165




Epoch 88/150: Train Loss 129.7122, Learning Rate 0.0004, Valid Loss 238.0546




Epoch 89/150: Train Loss 130.1644, Learning Rate 0.0004, Valid Loss 277.7906




Epoch 90/150: Train Loss 128.8524, Learning Rate 0.0003, Valid Loss 236.5498




Epoch 91/150: Train Loss 129.5381, Learning Rate 0.0003, Valid Loss 232.0423




Epoch 92/150: Train Loss 130.0372, Learning Rate 0.0003, Valid Loss 241.3579




Epoch 93/150: Train Loss 129.3278, Learning Rate 0.0003, Valid Loss 236.9514




Epoch 94/150: Train Loss 132.5086, Learning Rate 0.0003, Valid Loss 235.4095




Epoch 95/150: Train Loss 132.5904, Learning Rate 0.0003, Valid Loss 244.4159




Epoch 96/150: Train Loss 131.0777, Learning Rate 0.0003, Valid Loss 232.6072




Epoch 97/150: Train Loss 130.6383, Learning Rate 0.0003, Valid Loss 238.1068




Epoch 98/150: Train Loss 130.9510, Learning Rate 0.0003, Valid Loss 238.0466




Epoch 99/150: Train Loss 128.5047, Learning Rate 0.0003, Valid Loss 250.9928




Epoch 100/150: Train Loss 126.5799, Learning Rate 0.0003, Valid Loss 240.3945




Epoch 101/150: Train Loss 127.9295, Learning Rate 0.0002, Valid Loss 240.3567




Epoch 102/150: Train Loss 127.3154, Learning Rate 0.0002, Valid Loss 245.1286




Epoch 103/150: Train Loss 129.7426, Learning Rate 0.0002, Valid Loss 239.0117




Epoch 104/150: Train Loss 127.2530, Learning Rate 0.0002, Valid Loss 239.7744




Epoch 105/150: Train Loss 123.2411, Learning Rate 0.0002, Valid Loss 232.4523




Epoch 106/150: Train Loss 124.8128, Learning Rate 0.0002, Valid Loss 244.7932




Epoch 107/150: Train Loss 128.6473, Learning Rate 0.0002, Valid Loss 237.2875




Epoch 108/150: Train Loss 125.2425, Learning Rate 0.0002, Valid Loss 230.3022




Epoch 109/150: Train Loss 123.1171, Learning Rate 0.0002, Valid Loss 232.9489




Epoch 110/150: Train Loss 123.6265, Learning Rate 0.0002, Valid Loss 232.4440




Epoch 111/150: Train Loss 123.0487, Learning Rate 0.0002, Valid Loss 250.8864




Epoch 112/150: Train Loss 126.4336, Learning Rate 0.0002, Valid Loss 237.9223




Epoch 113/150: Train Loss 123.1756, Learning Rate 0.0001, Valid Loss 254.2025




Epoch 114/150: Train Loss 122.0428, Learning Rate 0.0001, Valid Loss 237.0258




Epoch 115/150: Train Loss 122.9303, Learning Rate 0.0001, Valid Loss 233.5303




Epoch 116/150: Train Loss 123.6900, Learning Rate 0.0001, Valid Loss 234.2813




Epoch 117/150: Train Loss 122.4655, Learning Rate 0.0001, Valid Loss 233.8173




Epoch 118/150: Train Loss 121.8503, Learning Rate 0.0001, Valid Loss 233.2501




Epoch 119/150: Train Loss 119.3946, Learning Rate 0.0001, Valid Loss 235.6827




Epoch 120/150: Train Loss 122.7155, Learning Rate 0.0001, Valid Loss 237.2863




Epoch 121/150: Train Loss 124.5745, Learning Rate 0.0001, Valid Loss 235.3814




Epoch 122/150: Train Loss 122.1470, Learning Rate 0.0001, Valid Loss 240.5243




Epoch 123/150: Train Loss 122.5953, Learning Rate 0.0001, Valid Loss 236.5107




Epoch 124/150: Train Loss 124.5393, Learning Rate 0.0001, Valid Loss 237.4129




Epoch 125/150: Train Loss 123.4178, Learning Rate 0.0001, Valid Loss 233.4195




Epoch 126/150: Train Loss 121.3274, Learning Rate 0.0001, Valid Loss 235.9647




Epoch 127/150: Train Loss 121.0701, Learning Rate 0.0001, Valid Loss 236.0305




Epoch 128/150: Train Loss 123.5589, Learning Rate 0.0001, Valid Loss 234.6648




Epoch 129/150: Train Loss 123.8140, Learning Rate 0.0000, Valid Loss 236.9283




Epoch 130/150: Train Loss 120.2639, Learning Rate 0.0000, Valid Loss 240.4884




Epoch 131/150: Train Loss 120.0804, Learning Rate 0.0000, Valid Loss 236.3231




Epoch 132/150: Train Loss 121.5886, Learning Rate 0.0000, Valid Loss 239.2471




Epoch 133/150: Train Loss 117.9661, Learning Rate 0.0000, Valid Loss 239.7756




Epoch 134/150: Train Loss 120.8373, Learning Rate 0.0000, Valid Loss 238.1136




Epoch 135/150: Train Loss 120.7754, Learning Rate 0.0000, Valid Loss 240.7355




Epoch 136/150: Train Loss 120.7674, Learning Rate 0.0000, Valid Loss 239.0040




Epoch 137/150: Train Loss 118.7926, Learning Rate 0.0000, Valid Loss 238.8392




Epoch 138/150: Train Loss 122.6199, Learning Rate 0.0000, Valid Loss 239.4552




Epoch 139/150: Train Loss 117.9929, Learning Rate 0.0000, Valid Loss 239.7487




Epoch 140/150: Train Loss 116.7426, Learning Rate 0.0000, Valid Loss 240.1334




Epoch 141/150: Train Loss 117.0934, Learning Rate 0.0000, Valid Loss 239.4382




Epoch 142/150: Train Loss 120.8948, Learning Rate 0.0000, Valid Loss 240.1332




Epoch 143/150: Train Loss 121.7963, Learning Rate 0.0000, Valid Loss 238.9679




Epoch 144/150: Train Loss 117.6370, Learning Rate 0.0000, Valid Loss 239.2778




Epoch 145/150: Train Loss 121.1966, Learning Rate 0.0000, Valid Loss 239.6058




Epoch 146/150: Train Loss 118.5258, Learning Rate 0.0000, Valid Loss 239.3179




Epoch 147/150: Train Loss 119.3313, Learning Rate 0.0000, Valid Loss 239.6612




Epoch 148/150: Train Loss 120.1371, Learning Rate 0.0000, Valid Loss 239.5265




Epoch 149/150: Train Loss 120.8878, Learning Rate 0.0000, Valid Loss 239.5855


                                                                   

Epoch 150/150: Train Loss 119.8560, Learning Rate 0.0000, Valid Loss 240.0568




# Evaluate Stats

## Copy Ground Truths to Colab 

In [60]:
! cp /content/drive/MyDrive/11785/project/ground_truth.csv /content/data/metadata/

## Dataset for Evaluation

In [55]:
class EvalDataset(Dataset):

    def __init__(self, data_dir, transforms=Compose([CenterCrop((256, 256)), ToTensor()]), labels=labels_dict):
        self.data_dir = data_dir
        self.transforms = transforms
        self.labels = labels

        # # ['dish_1556572657', 'dish_1556573514', 'dish_1556575014', 'dish_1556575083', 'dish_1556575124', 'dish_1556575273', 'dish_1556575327']
        dirs = os.listdir(self.data_dir)

        self.dish_ids = []
        for dir in dirs:
            if "rgb.png" in os.listdir(os.path.join(self.data_dir,dir)):
                self.dish_ids.append(dir)

        self.dish_ids.sort()

        self.img_paths = list(
            map(
                lambda fname: os.path.join(self.data_dir, fname),
                self.dish_ids,
            )
        )

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        rgb_path = f"{self.img_paths[idx]}/rgb.png"
        dish_id = self.dish_ids[idx]
        transformed_img = self.transforms(Image.open(rgb_path))
        # Return the dish id for writing the csv file
        return transformed_img, dish_id

In [56]:
torch.cuda.empty_cache()
eval_train_data = EvalDataset(TRAIN_DIR, labels=labels_dict)
eval_val_data = EvalDataset(VALID_DIR, labels=labels_dict)

eval_train_loader = DataLoader(eval_train_data, batch_size=config.batch_size, shuffle=False, num_workers=2)
eval_val_loader = DataLoader(eval_val_data, batch_size=config.batch_size, shuffle=False, num_workers=2)

## (Optional) Load Checkpoint

In [None]:
model = BaseNet()
_, _, model_state_dict = load_checkpoint(f"{CHECKPOINT_PATH}/baseline")
model.load_state_dict(model_state_dict)
model.cuda()

BaseNet(
  (backbone): InceptionV2(
    (block1): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (block2): Sequential(
      (0): Sequential(
        (0): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (block3): Sequential(
      (0): InceptionV2ModuleA(
        (branch1): Sequential(
          (0): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      

## Inference on training and testing data

In [57]:
prediction_filepath = "/content/data/metadata/outputs.csv"
# if os.path.exists(prediction_filepath):
#     os.remove(prediction_filepath)

model.eval()
batch_bar = tqdm(total=len(eval_train_loader), dynamic_ncols=True, position=0, leave=False, desc='Eval_Train')
results_all = None
# Inference on training data
for i, (x, y) in enumerate(eval_train_loader):

    x = x.cuda()
    dish_ids = np.array(list(y))
    dish_ids = dish_ids.reshape(dish_ids.shape[0],1)

    cal, mass, mn = model(x)

    results = torch.cat((cal,mass,mn), 1).detach().cpu().numpy()

    results = np.concatenate((dish_ids, results), 1)
    
    if results_all is None:
        results_all = results
    else:
        results_all = np.concatenate((results_all, results), 0)

    del cal, mass, mn
    torch.cuda.empty_cache()

    batch_bar.update()
    
batch_bar.close()
# Inference on validation data
batch_bar = tqdm(total=len(eval_val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
for i, (x, y) in enumerate(eval_val_loader):

    x = x.cuda()
    dish_ids = np.array(list(y))
    dish_ids = dish_ids.reshape(dish_ids.shape[0],1)

    cal, mass, mn = model(x)

    results = torch.cat((cal,mass,mn), 1).detach().cpu().numpy()

    results = np.concatenate((dish_ids, results), 1)
    
    if results_all is None:
        results_all = results
    else:
        results_all = np.concatenate((results_all, results), 0)

    del cal, mass, mn
    torch.cuda.empty_cache()

    batch_bar.update()
    
batch_bar.close()



In [58]:
# Write to csv
np.savetxt(prediction_filepath, results_all, delimiter=",", fmt='%s,%s,%s,%s,%s,%s')

In [61]:
r"""Script to compute statistics on nutrition predictions.

This script takes in a csv of nutrition predictions and computes absolute and
percentage mean average error values comparable to the metrics used to eval
models in the Nutrition5k paper. The input csv file of nutrition predictions
should be in the form of:
dish_id, calories, mass, carbs, protein
And the groundtruth values will be pulled from the metadata csv file provided
in the Nutrition5k dataset release where the first 5 fields are also:
dish_id, calories, mass, carbs, protein

Example Usage:
python compute_statistics.py path/to/groundtruth.csv path/to/predictions.csv \
path/to/output_statistics.json
"""

import json
from os import path
import statistics
import sys

DISH_ID_INDEX = 0
DATA_FIELDNAMES = ["dish_id", "calories", "mass", "fat", "carb", "protein"]


def ReadCsvData(filepath):
  if not path.exists(filepath):
    raise Exception("File %s not found" % path)
  parsed_data = {}
  with open(filepath, "r") as f_in:
    filelines = f_in.readlines()
    for line in filelines:
      data_values = line.strip().split(",")
      parsed_data[data_values[DISH_ID_INDEX]] = data_values
  return parsed_data

groundtruth_csv_path = "/content/data/metadata/ground_truth.csv"
predictions_csv_path = prediction_filepath
output_path = "/content/data/metadata/eval_results.json"

groundtruth_data = ReadCsvData(groundtruth_csv_path)
prediction_data = ReadCsvData(predictions_csv_path)

groundtruth_values = {}
err_values = {}
output_stats = {}

for field in DATA_FIELDNAMES[1:]:
  groundtruth_values[field] = []
  err_values[field] = []

for dish_id in prediction_data:
  for i in range(1, len(DATA_FIELDNAMES)):
    groundtruth_values[DATA_FIELDNAMES[i]].append(
        float(groundtruth_data[dish_id][i]))
    err_values[DATA_FIELDNAMES[i]].append(abs(
        float(prediction_data[dish_id][i])
        - float(groundtruth_data[dish_id][i])))

for field in DATA_FIELDNAMES[1:]:
  output_stats[field + "_MAE"] = statistics.mean(err_values[field])
  output_stats[field + "_MAE_%"] = (100 * statistics.mean(err_values[field]) /
                                    statistics.mean(groundtruth_values[field]))

with open(output_path, "w") as f_out:
  f_out.write(json.dumps(output_stats))
