# Midterm Expectation

- Baseline
- Experiment (i.e., something different; e.x., spreadsheet)
- Timeline (what we have done, and what we will be doing)

# Dependencies

In [None]:
import os

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torchvision
from torchvision.transforms import (
    CenterCrop,
    Compose,
    RandAugment,
    RandomHorizontalFlip,
    RandomRotation,
    ToTensor,
)
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import math

# Mount Google Drive

## Install gdfuse

In [None]:
!sudo add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!sudo apt-get update -qq 2>&1 > /dev/null
!sudo apt -y install -qq google-drive-ocamlfuse 2>&1 > /dev/null
!google-drive-ocamlfuse



debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: www-browser: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: links2: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: elinks: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: links: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: lynx: not found
/usr/bin/xdg-open: 851: /usr/bin/xdg-open: w3m: not found
xdg-open: no method available for opening 'https://accounts.google.com/o/oauth2/auth?client_id=564921029129.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fgd-ocaml-auth.appspot.

## Fetch API Key

In [None]:
!sudo apt-get install -qq w3m # to act as web browser 
!xdg-settings set default-web-browser w3m.desktop # to set default browser
%cd /content
!mkdir drive
%cd drive
!mkdir MyDrive
%cd ..
%cd ..
!google-drive-ocamlfuse /content/drive/MyDrive

/content
mkdir: cannot create directory ‘drive’: File exists
/content/drive
mkdir: cannot create directory ‘MyDrive’: File exists
/content
/
Access token retrieved correctly.


# Unzip Data

In [None]:
! pip install gdown
! cd /content/ && gdown https://drive.google.com/uc?id=1zD7fUAt12L16ywPSjRsj4IqL-Lqg13zK
# ! cp /content/drive/MyDrive/11785/project/data.zip /content/
! cd /content && unzip data.zip
! rm /content/data.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: data/imagery/realsense_overhead/train/dish_1562169840/rgb.png  
   creating: data/imagery/realsense_overhead/train/dish_1562170163/
  inflating: data/imagery/realsense_overhead/train/dish_1562170163/depth_color.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170163/depth_raw.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170163/rgb.png  
   creating: data/imagery/realsense_overhead/train/dish_1562170236/
  inflating: data/imagery/realsense_overhead/train/dish_1562170236/depth_color.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170236/depth_raw.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170236/rgb.png  
   creating: data/imagery/realsense_overhead/train/dish_1562170331/
  inflating: data/imagery/realsense_overhead/train/dish_1562170331/depth_color.png  
  inflating: data/imagery/realsense_overhead/train/dish_1562170331/depth_raw.pn

# Global Variables

In [None]:
DATA_PATH = "/content/data"
METADATA_DIR = f"{DATA_PATH}/metadata"
IMAGERY_DIR = f"{DATA_PATH}/imagery/realsense_overhead"

# Helper Functions for Data

In [None]:
def read_csv_variable_cols(filepath: str) -> pd.DataFrame:
    """https://stackoverflow.com/a/57824142.
    We only read the first 6 columns to retrieve required labels.
    """
    ### Loop the data lines
    with open(filepath, 'r') as temp_f:
        # get No of columns in each line
        col_count = [ len(l.split(",")) for l in temp_f.readlines() ]

    ### Generate column names  (names will be 0, 1, 2, ..., maximum columns - 1)
    column_names = [i for i in range(0, max(col_count))]

    ### Read csv
    return pd.read_csv(filepath, header=None, delimiter=",", names=column_names, low_memory=False).iloc[:,:6]

# Ingredient and Dish Metadata (Groun Truths)

## Data Format

### Training-testing data

- "imagery/realsense_overhead/dish_<id>" contains the images as the input data
- "dish_ids" contains training-testing splits

### Labels (metadata)

All labels need to be preprocessed. For each dish, we need to extract the following:
- total calorie
- mass (optional according to the paper)
- the amount of the three macronutrients (fat, carb, protein)

It doesn't seem that bad - we don't need to process the ingredients because they are purely there for constructing the labels. For our multi-task learning, we only need to have the above labels. The three tasks are:

1. Calorie
2. Macronutrients (fat, carb, protein)
3. Mass (optional)

这样一来我们可以把labels和image data放在一起，每次返回input和expected output.

## Dish Metadata

In [None]:
# Metadata for dishes has variable numbers of columns per row.
# Can do similar stuff to dish_metadata_cafe2.csv
# The first 6 columns: [dish_id, total_calories, total_mass, total_fat, total_carb, total_protein]
dish_metadata_1 = read_csv_variable_cols(f"{METADATA_DIR}/dish_metadata_cafe1.csv")
# Rename the columns
dish_metadata_1 = dish_metadata_1.rename(columns={0:"dish_id", 1:"total_calories", 2:"total_mass", 3:"total_fat", 4:"total_carb", 5:"total_protein"})

dish_metadata_2 = read_csv_variable_cols(f"{METADATA_DIR}/dish_metadata_cafe2.csv")
# Rename the columns
dish_metadata_2 = dish_metadata_2.rename(columns={0:"dish_id", 1:"total_calories", 2:"total_mass", 3:"total_fat", 4:"total_carb", 5:"total_protein"})

dish_metadata = pd.concat((dish_metadata_1, dish_metadata_2), ignore_index=True)
# Convert to dictionary
labels_dict = dish_metadata.set_index("dish_id").to_dict("index")

# Hyperparameters

In [None]:
config = {
    'epochs': 150,
    'batch_size': 32,
    'lr': 2e-4,
}

class Config:
    def __init__(self, config):
        for k, v in config.items():
            setattr(self, k, v)

config = Config(config)

transforms = Compose([CenterCrop((256, 256)), RandAugment(3), ToTensor()])

# Datasets and DataLoaders

In [None]:
class RGBDataset(Dataset):
    """4.2 The input resolution to the
    network is a 256x256 image, where images were downsized
    and center cropped in order to retain the most salient dish
    region.

    我们baseline应该只用RGB就行 (根据4.2).
    """

    def __init__(self, data_dir, transforms=transforms, labels=labels_dict, train=True):
        self.data_dir = data_dir
        self.transforms = transforms
        self.labels = labels
        self.train = train

        # # ['dish_1556572657', 'dish_1556573514', 'dish_1556575014', 'dish_1556575083', 'dish_1556575124', 'dish_1556575273', 'dish_1556575327']
        dirs = os.listdir(self.data_dir)

        self.dish_ids = []
        for dir in dirs:
            if "rgb.png" in os.listdir(os.path.join(self.data_dir,dir)):
                self.dish_ids.append(dir)

        self.dish_ids.sort()

        self.img_paths = list(
            map(
                lambda fname: os.path.join(self.data_dir, fname),
                self.dish_ids,
            )
        )

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        rgb_path = f"{self.img_paths[idx]}/rgb.png"
        dish_id = self.dish_ids[idx]
        transformed_img = self.transforms(Image.open(rgb_path))
        if self.train:
            label = torch.tensor(list(self.labels[dish_id].values()))
            return transformed_img, label
        else:
            return transformed_img

In [None]:
TRAIN_DIR = f"{IMAGERY_DIR}/train"
VALID_DIR = f"{IMAGERY_DIR}/test"
# TEST_DIR = IMAGERY_DIR

train_dataset = RGBDataset(TRAIN_DIR, labels=labels_dict)
valid_dataset = RGBDataset(VALID_DIR, labels=labels_dict)
# test_dataset = RGBDataset(TEST_DIR, train=False)

train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2)
valid_loader = DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2)
# test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2)

num_training_batches = len(train_loader)

# InceptionV2

In [None]:
def ConvBNReLU(in_channels,out_channels,kernel_size,stride=1,padding=0):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,padding=padding),
        nn.BatchNorm2d(out_channels),
        nn.ReLU6(inplace=True),
    )

def ConvBNReLUFactorization(in_channels,out_channels,kernel_sizes,paddings):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_sizes, stride=1,padding=paddings),
        nn.BatchNorm2d(out_channels),
        nn.ReLU6(inplace=True)
    )

class InceptionV2ModuleA(nn.Module):
    def __init__(self, in_channels,out_channels1,out_channels2reduce, out_channels2, out_channels3reduce, out_channels3, out_channels4):
        super(InceptionV2ModuleA, self).__init__()

        self.branch1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)

        self.branch2 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
            ConvBNReLU(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_size=3, padding=1),
        )

        self.branch3 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels,out_channels=out_channels3reduce,kernel_size=1),
            ConvBNReLU(in_channels=out_channels3reduce, out_channels=out_channels3, kernel_size=3, padding=1),
            ConvBNReLU(in_channels=out_channels3, out_channels=out_channels3, kernel_size=3, padding=1),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1),
        )

    def forward(self, x):
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out4 = self.branch4(x)
        out = torch.cat([out1, out2, out3, out4], dim=1)
        return out

class InceptionV2ModuleB(nn.Module):
    def __init__(self, in_channels,out_channels1,out_channels2reduce, out_channels2, out_channels3reduce, out_channels3, out_channels4):
        super(InceptionV2ModuleB, self).__init__()

        self.branch1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)

        self.branch2 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
            ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2reduce, kernel_sizes=[1,3],paddings=[0,1]),
            ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_sizes=[3,1],paddings=[1, 0]),
        )

        self.branch3 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels,out_channels=out_channels3reduce,kernel_size=1),
            ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3reduce,kernel_sizes=[1, 3], paddings=[0, 1]),
            ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3reduce,kernel_sizes=[3, 1], paddings=[1, 0]),
            ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3reduce, kernel_sizes=[1, 3], paddings=[0, 1]),
            ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3,kernel_sizes=[3, 1], paddings=[1, 0]),
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1),
        )

    def forward(self, x):
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out4 = self.branch4(x)
        out = torch.cat([out1, out2, out3, out4], dim=1)
        return out

class InceptionV2ModuleC(nn.Module):
    def __init__(self, in_channels,out_channels1,out_channels2reduce, out_channels2, out_channels3reduce, out_channels3, out_channels4):
        super(InceptionV2ModuleC, self).__init__()

        self.branch1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)

        self.branch2_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1)
        self.branch2_conv2a = ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_sizes=[1,3],paddings=[0,1])
        self.branch2_conv2b = ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_sizes=[3,1],paddings=[1,0])

        self.branch3_conv1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels3reduce,kernel_size=1)
        self.branch3_conv2 = ConvBNReLU(in_channels=out_channels3reduce, out_channels=out_channels3, kernel_size=3,stride=1,padding=1)
        self.branch3_conv3a = ConvBNReLUFactorization(in_channels=out_channels3, out_channels=out_channels3, kernel_sizes=[3, 1],paddings=[1, 0])
        self.branch3_conv3b = ConvBNReLUFactorization(in_channels=out_channels3, out_channels=out_channels3, kernel_sizes=[1, 3],paddings=[0, 1])

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1),
        )

    def forward(self, x):
        out1 = self.branch1(x)
        x2 = self.branch2_conv1(x)
        out2 = torch.cat([self.branch2_conv2a(x2), self.branch2_conv2b(x2)],dim=1)
        x3 = self.branch3_conv2(self.branch3_conv1(x))
        out3 = torch.cat([self.branch3_conv3a(x3), self.branch3_conv3b(x3)], dim=1)
        out4 = self.branch4(x)
        out = torch.cat([out1, out2, out3, out4], dim=1)
        return out

class InceptionV3ModuleD(nn.Module):
    def __init__(self, in_channels,out_channels1reduce,out_channels1,out_channels2reduce, out_channels2):
        super(InceptionV3ModuleD, self).__init__()

        self.branch1 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels1reduce, kernel_size=1),
            ConvBNReLU(in_channels=out_channels1reduce, out_channels=out_channels1, kernel_size=3,stride=2,padding=1)
        )

        self.branch2 = nn.Sequential(
            ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
            ConvBNReLU(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_size=3, stride=1, padding=1),
            ConvBNReLU(in_channels=out_channels2, out_channels=out_channels2, kernel_size=3, stride=2,padding=1),
        )

        self.branch3 = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)

    def forward(self, x):
        out1 = self.branch1(x)
        out2 = self.branch2(x)
        out3 = self.branch3(x)
        out = torch.cat([out1, out2, out3], dim=1)
        return out

class InceptionAux(nn.Module):
    def __init__(self, in_channels,out_channels):
        super(InceptionAux, self).__init__()

        self.auxiliary_avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.auxiliary_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=128, kernel_size=1)
        self.auxiliary_conv2 = nn.Conv2d(in_channels=128, out_channels=768, kernel_size=5,stride=1)
        self.auxiliary_dropout = nn.Dropout(p=0.7)
        self.auxiliary_linear1 = nn.Linear(in_features=768, out_features=out_channels)

    def forward(self, x):
        x = self.auxiliary_conv1(self.auxiliary_avgpool(x))
        x = self.auxiliary_conv2(x)
        x = x.view(x.size(0), -1)
        out = self.auxiliary_linear1(self.auxiliary_dropout(x))
        return out

class InceptionV2(nn.Module):
    def __init__(self, num_classes=1000, stage='train'):
        super(InceptionV2, self).__init__()
        self.stage = stage

        self.block1 = nn.Sequential(
            ConvBNReLU(in_channels=3, out_channels=64, kernel_size=7,stride=2,padding=3),
            nn.MaxPool2d(kernel_size=3,stride=2,padding=1),
        )

        self.block2 = nn.Sequential(
            ConvBNReLU(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=3, stride=2,padding=1),
        )

        self.block3 = nn.Sequential(
            InceptionV2ModuleA(in_channels=192,out_channels1=64,out_channels2reduce=64, out_channels2=64, out_channels3reduce=64, out_channels3=96, out_channels4=32),
            InceptionV2ModuleA(in_channels=256, out_channels1=64, out_channels2reduce=64, out_channels2=96,out_channels3reduce=64, out_channels3=96, out_channels4=64),
            InceptionV3ModuleD(in_channels=320, out_channels1reduce=128, out_channels1=160, out_channels2reduce=64,out_channels2=96),
        )

        self.block4 = nn.Sequential(
            InceptionV2ModuleB(in_channels=576, out_channels1=224, out_channels2reduce=64, out_channels2=96,out_channels3reduce=96, out_channels3=128, out_channels4=128),
            InceptionV2ModuleB(in_channels=576, out_channels1=192, out_channels2reduce=96, out_channels2=128,out_channels3reduce=96, out_channels3=128, out_channels4=128),
            InceptionV2ModuleB(in_channels=576, out_channels1=160, out_channels2reduce=128, out_channels2=160,out_channels3reduce=128, out_channels3=128, out_channels4=128),
            InceptionV2ModuleB(in_channels=576, out_channels1=96, out_channels2reduce=128, out_channels2=192,out_channels3reduce=160, out_channels3=160, out_channels4=128),
            InceptionV3ModuleD(in_channels=576, out_channels1reduce=128, out_channels1=192, out_channels2reduce=192,out_channels2=256),
        )

        self.block5 = nn.Sequential(
            InceptionV2ModuleC(in_channels=1024, out_channels1=352, out_channels2reduce=192, out_channels2=160,out_channels3reduce=160, out_channels3=112, out_channels4=128),
            InceptionV2ModuleC(in_channels=1024, out_channels1=352, out_channels2reduce=192, out_channels2=160,
                               out_channels3reduce=192, out_channels3=112, out_channels4=128)
        )

        self.avg_pool = nn.AdaptiveAvgPool2d((2,2))
        self.dropout = nn.Dropout(p=0.5)
    

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.avg_pool(x)
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        return x

# Resnet50

In [None]:
import torch.utils.model_zoo as model_zoo
model_urls = {
    'resnet18': 'https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth',
    'resnet34': 'https://s3.amazonaws.com/pytorch/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://s3.amazonaws.com/pytorch/models/resnet50-19c8e357.pth',
    'resnet101': 'https://s3.amazonaws.com/pytorch/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://s3.amazonaws.com/pytorch/models/resnet152-b121ed2d.pth',
}

class Bottleneck(nn.Module):
  expansion = 4

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(Bottleneck, self).__init__()
    
    self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
    self.bn1 = nn.BatchNorm2d(planes)
    
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
                 padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(planes)
    
    self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(planes * 4)
    self.relu = nn.ReLU(inplace=True)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)

    out = self.conv3(out)
    out = self.bn3(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out
    
    
class ResNet(nn.Module):
  def __init__(self, block, layers, num_classes=1000):
    self.inplanes = 64
    super().__init__()
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                 bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change
    self.layer1 = self._make_layer(block, 64, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)   # different
    self.avgpool = nn.AvgPool2d(7)
    self.fc = nn.Linear(512 * block.expansion, num_classes)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

  def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
        nn.Conv2d(self.inplanes, planes * block.expansion,
              kernel_size=1, stride=stride, bias=False),
        nn.BatchNorm2d(planes * block.expansion),
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
      layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)

    return x

def resnet50(pretrained=True):
  """Constructs a ResNet-50 model.
  Args:
    pretrained (bool): If True, returns a model pre-trained on ImageNet
  """
  model = ResNet(Bottleneck, [3, 4, 6, 3])
  if pretrained:
    model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
  return model


# Baseline Model

In [None]:
class BaseNet(nn.Module):
    def __init__(self):
        super().__init__()
        # self.backbone = InceptionV2([1, 1, 3])
        self.backbone = resnet50()
        self.fc = nn.Linear(1000, 4096) 
        self.fc1 = nn.Linear(4096, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc_calories = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.Linear(4096, 1)
        )
        self.fc_mass = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.Linear(4096, 1)
        )
        self.fc_mc = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.Linear(4096, 3)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x) # Comment it if backbone is resnet
        x = self.fc2(self.fc1(x))

        x_cal = self.fc_calories(x)
        x_mass = self.fc_mass(x)
        x_mn = self.fc_mc(x)

        return x_cal, x_mass, x_mn

# Multi-task Loss

In [None]:
class MultiTaskLearner(nn.Module):
    def __init__(self, model: nn.Module):
        super(MultiTaskLearner, self).__init__()
        self.model = model
        self.criterion = nn.L1Loss()

    def forward(self, x, y):
        # 1 x 5 Tensor [total_calories, total_mass, total_fat, total_carb, total_protein]

        out_cal, out_mass, out_mn = self.model(x)

        loss_calorie = self.criterion(out_cal, y[:, 0:1])
        
        loss_mass = self.criterion(out_mass, y[:, 1:2])

        loss_mn = self.criterion(out_mn, y[:, 2:])

        loss_total = loss_calorie + loss_mass + loss_mn

        return loss_total

# Delete Model

In [None]:
# del model

# Utility Funs

In [None]:
CHECKPOINT_PATH = "/content/drive/MyDrive/checkpoints"
def create_dir_if_not_exists(dirpath: str):
    """Create the specified directory with all intermediate directories if necessary."""
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

def save_checkpoint(
    epoch: int,
    loss: float,
    model: nn.Module,
    model_name: str,
    checkpoint_path: str = CHECKPOINT_PATH,
):
    create_dir_if_not_exists(checkpoint_path)
    torch.save(
        {"epoch": epoch, "loss": loss, "model_state_dict": model.state_dict()},
        os.path.join(checkpoint_path, model_name),
    )


def load_checkpoint(filepath: str):
    state_dict = torch.load(filepath)
    epoch, loss, model_state_dict = (
        state_dict["epoch"],
        state_dict["loss"],
        state_dict["model_state_dict"],
    )
    return epoch, loss, model_state_dict

# Training

In [None]:
print(f"Cuda is available: {torch.cuda.is_available()}")
model = BaseNet()
model.cuda()
learner = MultiTaskLearner(model)

optimizer = torch.optim.Adam(model.parameters(), config.lr, weight_decay=0.9)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.epochs*len(train_loader))
scaler = torch.cuda.amp.GradScaler()

Cuda is available: True


Downloading: "https://s3.amazonaws.com/pytorch/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [None]:
for epoch in range(config.epochs):
    model.train()
    total_loss = 0

    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train')

    for i, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()

        x = x.cuda()
        y = y.cuda()

        # with torch.cuda.amp.autocast():     
        loss = learner(x, y)

        # Update # correct & loss as we go
        total_loss += float(loss)

        # Compute training metrics
        train_loss = float(total_loss / (i + 1))
        cur_lr = float(optimizer.param_groups[0]['lr'])


        # tqdm lets you add some details so you can monitor training as you train.
        batch_bar.set_postfix(
            loss="{:.04f}".format(train_loss),
            lr="{:.04f}".format(cur_lr))
        
        # Another couple things you need for FP16. 
        # scaler.scale(loss).backward() # This is a replacement for loss.backward()
        loss.backward()
        # scaler.step(optimizer) # This is a replacement for optimizer.step()
        optimizer.step()
        # scaler.update() # This is something added just for FP16

        scheduler.step() # We told scheduler T_max that we'd call step() (len(train_loader) * epochs) many times.

        batch_bar.update() # Update tqdm bar

    batch_bar.close() # You need this to close the tqdm bar

    train_loss = total_loss / len(train_loader)
    
    # Save the model every 3 epochs
    if epoch % 3 == 0:
        save_checkpoint(epoch, train_loss, model, "resnet")

    # You can add validation per-epoch here if you would like
    model.eval()
    batch_bar = tqdm(total=len(valid_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
    total_loss = 0
    for i, (x, y) in enumerate(valid_loader):

        x = x.cuda()
        y = y.cuda()

        with torch.no_grad():
            loss = learner(x, y)
        

        total_loss += float(loss)

        batch_bar.set_postfix(loss="{:.04f}".format(float(total_loss / (i + 1))))

        batch_bar.update()
        
    batch_bar.close()

    # scheduler.step(float(total_loss / (i + 1)))

    valid_loss = total_loss / len(valid_loader)

    print("Epoch {}/{}: Train Loss {:.04f}, Learning Rate {:.04f}, Valid Loss {:.04f}".format(
        epoch + 1, config.epochs, train_loss, cur_lr, valid_loss))




Epoch 1/150: Train Loss 230.9911, Learning Rate 0.0002, Valid Loss 267.3615




Epoch 2/150: Train Loss 197.0106, Learning Rate 0.0002, Valid Loss 232.3654




Epoch 3/150: Train Loss 183.9895, Learning Rate 0.0002, Valid Loss 302.6214




Epoch 4/150: Train Loss 183.9020, Learning Rate 0.0002, Valid Loss 220.8743




Epoch 5/150: Train Loss 180.3823, Learning Rate 0.0002, Valid Loss 254.8966




Epoch 6/150: Train Loss 174.8689, Learning Rate 0.0002, Valid Loss 290.2088




Epoch 7/150: Train Loss 171.9443, Learning Rate 0.0002, Valid Loss 234.2319




Epoch 8/150: Train Loss 170.2052, Learning Rate 0.0002, Valid Loss 232.9958




Epoch 9/150: Train Loss 182.8373, Learning Rate 0.0002, Valid Loss 234.6232




Epoch 10/150: Train Loss 165.6132, Learning Rate 0.0002, Valid Loss 248.9599




Epoch 11/150: Train Loss 169.9649, Learning Rate 0.0002, Valid Loss 259.3542




Epoch 12/150: Train Loss 174.9320, Learning Rate 0.0002, Valid Loss 249.6022




Epoch 13/150: Train Loss 167.9245, Learning Rate 0.0002, Valid Loss 252.4718




Epoch 14/150: Train Loss 168.5438, Learning Rate 0.0002, Valid Loss 267.8746




Epoch 15/150: Train Loss 160.9468, Learning Rate 0.0002, Valid Loss 256.7955




Epoch 16/150: Train Loss 165.3280, Learning Rate 0.0002, Valid Loss 252.7907




Epoch 17/150: Train Loss 159.7607, Learning Rate 0.0002, Valid Loss 252.5854




Epoch 18/150: Train Loss 167.1574, Learning Rate 0.0002, Valid Loss 262.3470




Epoch 19/150: Train Loss 164.7644, Learning Rate 0.0002, Valid Loss 228.3393




Epoch 20/150: Train Loss 157.9560, Learning Rate 0.0002, Valid Loss 242.9783




Epoch 21/150: Train Loss 163.0881, Learning Rate 0.0002, Valid Loss 230.9085




Epoch 22/150: Train Loss 164.2510, Learning Rate 0.0002, Valid Loss 283.9764




Epoch 23/150: Train Loss 161.2473, Learning Rate 0.0002, Valid Loss 283.4307




Epoch 24/150: Train Loss 161.7909, Learning Rate 0.0002, Valid Loss 237.5370




Epoch 25/150: Train Loss 164.4771, Learning Rate 0.0002, Valid Loss 256.8057




Epoch 26/150: Train Loss 163.5829, Learning Rate 0.0002, Valid Loss 254.6728




Epoch 27/150: Train Loss 161.6485, Learning Rate 0.0002, Valid Loss 226.6219




Epoch 28/150: Train Loss 161.1210, Learning Rate 0.0002, Valid Loss 227.1609




Epoch 29/150: Train Loss 153.1240, Learning Rate 0.0002, Valid Loss 243.0905




Epoch 30/150: Train Loss 159.8038, Learning Rate 0.0002, Valid Loss 223.6079




Epoch 31/150: Train Loss 158.9529, Learning Rate 0.0002, Valid Loss 243.4766




Epoch 32/150: Train Loss 157.1140, Learning Rate 0.0002, Valid Loss 237.9377




Epoch 33/150: Train Loss 154.0840, Learning Rate 0.0002, Valid Loss 292.1510




Epoch 34/150: Train Loss 156.9677, Learning Rate 0.0002, Valid Loss 300.5338




Epoch 35/150: Train Loss 154.6350, Learning Rate 0.0002, Valid Loss 225.2026




Epoch 36/150: Train Loss 152.7859, Learning Rate 0.0002, Valid Loss 237.5347




Epoch 37/150: Train Loss 154.4066, Learning Rate 0.0002, Valid Loss 251.6000




Epoch 38/150: Train Loss 154.1020, Learning Rate 0.0002, Valid Loss 231.4852




Epoch 39/150: Train Loss 154.9478, Learning Rate 0.0002, Valid Loss 258.5725




Epoch 40/150: Train Loss 154.2849, Learning Rate 0.0002, Valid Loss 230.0621




Epoch 41/150: Train Loss 155.3602, Learning Rate 0.0002, Valid Loss 243.3940




Epoch 42/150: Train Loss 155.1968, Learning Rate 0.0002, Valid Loss 232.3082




Epoch 43/150: Train Loss 151.9193, Learning Rate 0.0002, Valid Loss 235.8598




Epoch 44/150: Train Loss 156.6006, Learning Rate 0.0002, Valid Loss 251.4183




Epoch 45/150: Train Loss 151.6343, Learning Rate 0.0002, Valid Loss 229.4778




Epoch 46/150: Train Loss 146.5252, Learning Rate 0.0002, Valid Loss 230.6295




Epoch 47/150: Train Loss 150.9247, Learning Rate 0.0002, Valid Loss 227.0545




Epoch 48/150: Train Loss 151.5304, Learning Rate 0.0002, Valid Loss 230.7712




Epoch 49/150: Train Loss 145.7365, Learning Rate 0.0002, Valid Loss 238.8406




Epoch 50/150: Train Loss 146.7412, Learning Rate 0.0002, Valid Loss 223.9466




Epoch 51/150: Train Loss 144.5255, Learning Rate 0.0001, Valid Loss 266.9212




Epoch 52/150: Train Loss 145.2141, Learning Rate 0.0001, Valid Loss 226.4516




Epoch 53/150: Train Loss 145.4890, Learning Rate 0.0001, Valid Loss 251.4551




Epoch 54/150: Train Loss 143.5033, Learning Rate 0.0001, Valid Loss 231.9900




Epoch 55/150: Train Loss 145.0761, Learning Rate 0.0001, Valid Loss 247.4452




Epoch 56/150: Train Loss 146.2323, Learning Rate 0.0001, Valid Loss 235.9306




Epoch 57/150: Train Loss 141.8619, Learning Rate 0.0001, Valid Loss 229.3408




Epoch 58/150: Train Loss 142.2260, Learning Rate 0.0001, Valid Loss 217.0134




Epoch 59/150: Train Loss 140.8274, Learning Rate 0.0001, Valid Loss 240.3511




Epoch 60/150: Train Loss 141.2434, Learning Rate 0.0001, Valid Loss 232.1583




Epoch 61/150: Train Loss 140.5805, Learning Rate 0.0001, Valid Loss 219.2928




Epoch 62/150: Train Loss 142.6016, Learning Rate 0.0001, Valid Loss 233.8781




Epoch 63/150: Train Loss 141.5187, Learning Rate 0.0001, Valid Loss 228.1347




Epoch 64/150: Train Loss 139.2133, Learning Rate 0.0001, Valid Loss 279.1253




Epoch 65/150: Train Loss 138.1123, Learning Rate 0.0001, Valid Loss 233.7928




Epoch 66/150: Train Loss 137.5298, Learning Rate 0.0001, Valid Loss 279.2568




Epoch 67/150: Train Loss 139.5207, Learning Rate 0.0001, Valid Loss 248.6274




Epoch 68/150: Train Loss 137.9794, Learning Rate 0.0001, Valid Loss 238.4924




Epoch 69/150: Train Loss 136.8266, Learning Rate 0.0001, Valid Loss 221.5792




Epoch 70/150: Train Loss 143.3527, Learning Rate 0.0001, Valid Loss 247.9907




Epoch 71/150: Train Loss 135.0902, Learning Rate 0.0001, Valid Loss 224.8608




Epoch 72/150: Train Loss 133.4396, Learning Rate 0.0001, Valid Loss 226.1660




Epoch 73/150: Train Loss 135.5503, Learning Rate 0.0001, Valid Loss 231.0822




Epoch 74/150: Train Loss 133.0745, Learning Rate 0.0001, Valid Loss 241.0770




Epoch 75/150: Train Loss 134.6598, Learning Rate 0.0001, Valid Loss 237.9841




Epoch 76/150: Train Loss 134.2241, Learning Rate 0.0001, Valid Loss 223.3595




Epoch 77/150: Train Loss 133.1289, Learning Rate 0.0001, Valid Loss 235.2765




Epoch 78/150: Train Loss 129.4684, Learning Rate 0.0001, Valid Loss 233.0101




Epoch 79/150: Train Loss 131.3256, Learning Rate 0.0001, Valid Loss 221.6321




Epoch 80/150: Train Loss 131.4972, Learning Rate 0.0001, Valid Loss 224.1305




Epoch 81/150: Train Loss 130.1471, Learning Rate 0.0001, Valid Loss 220.9061




Epoch 82/150: Train Loss 129.7159, Learning Rate 0.0001, Valid Loss 241.1458




Epoch 83/150: Train Loss 132.1270, Learning Rate 0.0001, Valid Loss 237.2882




Epoch 84/150: Train Loss 130.8902, Learning Rate 0.0001, Valid Loss 223.2593




Epoch 85/150: Train Loss 127.1615, Learning Rate 0.0001, Valid Loss 222.3885




Epoch 86/150: Train Loss 128.0240, Learning Rate 0.0001, Valid Loss 225.6939




Epoch 87/150: Train Loss 126.6997, Learning Rate 0.0001, Valid Loss 232.6302




Epoch 88/150: Train Loss 128.8379, Learning Rate 0.0001, Valid Loss 233.2363




Epoch 89/150: Train Loss 127.1255, Learning Rate 0.0001, Valid Loss 226.2584




Epoch 90/150: Train Loss 126.8757, Learning Rate 0.0001, Valid Loss 229.5872




Epoch 91/150: Train Loss 127.1251, Learning Rate 0.0001, Valid Loss 235.2618




Epoch 92/150: Train Loss 123.6253, Learning Rate 0.0001, Valid Loss 225.8306




Epoch 93/150: Train Loss 126.8825, Learning Rate 0.0001, Valid Loss 224.8110




Epoch 94/150: Train Loss 123.6821, Learning Rate 0.0001, Valid Loss 226.6678




Epoch 95/150: Train Loss 124.5899, Learning Rate 0.0001, Valid Loss 228.8562




Epoch 96/150: Train Loss 123.8170, Learning Rate 0.0001, Valid Loss 228.3872




Epoch 97/150: Train Loss 123.3654, Learning Rate 0.0001, Valid Loss 228.7225




Epoch 98/150: Train Loss 122.2288, Learning Rate 0.0001, Valid Loss 222.3127




Epoch 99/150: Train Loss 122.2187, Learning Rate 0.0001, Valid Loss 221.7977




Epoch 100/150: Train Loss 125.2590, Learning Rate 0.0001, Valid Loss 224.3455




Epoch 101/150: Train Loss 121.4597, Learning Rate 0.0000, Valid Loss 223.4989




Epoch 102/150: Train Loss 120.9234, Learning Rate 0.0000, Valid Loss 221.6972




Epoch 103/150: Train Loss 119.5605, Learning Rate 0.0000, Valid Loss 222.9157




Epoch 104/150: Train Loss 122.8804, Learning Rate 0.0000, Valid Loss 221.6312




Epoch 105/150: Train Loss 120.6096, Learning Rate 0.0000, Valid Loss 221.6504




Epoch 106/150: Train Loss 118.6675, Learning Rate 0.0000, Valid Loss 217.8613




Epoch 107/150: Train Loss 117.8143, Learning Rate 0.0000, Valid Loss 223.0696




Epoch 108/150: Train Loss 118.2166, Learning Rate 0.0000, Valid Loss 217.5453




Epoch 109/150: Train Loss 118.0278, Learning Rate 0.0000, Valid Loss 224.6118




Epoch 110/150: Train Loss 118.9817, Learning Rate 0.0000, Valid Loss 235.8903




Epoch 111/150: Train Loss 118.7269, Learning Rate 0.0000, Valid Loss 225.4904




Epoch 112/150: Train Loss 116.3049, Learning Rate 0.0000, Valid Loss 224.1650




Epoch 113/150: Train Loss 117.8008, Learning Rate 0.0000, Valid Loss 225.4331




Epoch 114/150: Train Loss 118.6756, Learning Rate 0.0000, Valid Loss 219.5083




Epoch 115/150: Train Loss 117.7412, Learning Rate 0.0000, Valid Loss 225.5912




Epoch 116/150: Train Loss 116.4883, Learning Rate 0.0000, Valid Loss 218.6566




Epoch 117/150: Train Loss 116.5723, Learning Rate 0.0000, Valid Loss 231.9915




Epoch 118/150: Train Loss 116.5945, Learning Rate 0.0000, Valid Loss 228.0800




Epoch 119/150: Train Loss 115.8361, Learning Rate 0.0000, Valid Loss 224.4755




Epoch 120/150: Train Loss 116.4750, Learning Rate 0.0000, Valid Loss 223.2650




Epoch 121/150: Train Loss 115.6773, Learning Rate 0.0000, Valid Loss 219.9412




Epoch 122/150: Train Loss 113.4233, Learning Rate 0.0000, Valid Loss 221.2865




Epoch 123/150: Train Loss 115.8571, Learning Rate 0.0000, Valid Loss 224.4613




Epoch 124/150: Train Loss 114.4311, Learning Rate 0.0000, Valid Loss 223.7237




Epoch 125/150: Train Loss 112.5712, Learning Rate 0.0000, Valid Loss 221.5572




Epoch 126/150: Train Loss 113.5950, Learning Rate 0.0000, Valid Loss 226.4818




Epoch 127/150: Train Loss 113.4227, Learning Rate 0.0000, Valid Loss 225.0097




Epoch 128/150: Train Loss 112.9285, Learning Rate 0.0000, Valid Loss 223.1662




Epoch 129/150: Train Loss 112.8373, Learning Rate 0.0000, Valid Loss 220.3987




Epoch 130/150: Train Loss 113.1518, Learning Rate 0.0000, Valid Loss 223.8042




Epoch 131/150: Train Loss 111.3479, Learning Rate 0.0000, Valid Loss 222.0513




Epoch 132/150: Train Loss 112.0740, Learning Rate 0.0000, Valid Loss 220.7919




Epoch 133/150: Train Loss 111.0847, Learning Rate 0.0000, Valid Loss 221.5398




Epoch 134/150: Train Loss 111.6192, Learning Rate 0.0000, Valid Loss 221.7751




Epoch 135/150: Train Loss 112.6584, Learning Rate 0.0000, Valid Loss 224.9978


Val:   0%|          | 0/29 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
Traceback (most recent call last):
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent

Epoch 136/150: Train Loss 111.8942, Learning Rate 0.0000, Valid Loss 222.6962


Train:   0%|          | 0/81 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_worke

Epoch 137/150: Train Loss 112.4285, Learning Rate 0.0000, Valid Loss 220.6817




Epoch 138/150: Train Loss 111.1878, Learning Rate 0.0000, Valid Loss 222.7153




Epoch 139/150: Train Loss 110.6225, Learning Rate 0.0000, Valid Loss 221.3926




Epoch 140/150: Train Loss 111.0053, Learning Rate 0.0000, Valid Loss 225.8055




Epoch 141/150: Train Loss 110.6362, Learning Rate 0.0000, Valid Loss 224.0102


Val:   0%|          | 0/29 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers


Epoch 142/150: Train Loss 111.3578, Learning Rate 0.0000, Valid Loss 223.4220


Train:   0%|          | 0/81 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
AssertionError: can only test a child process
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_worke

Epoch 143/150: Train Loss 110.5490, Learning Rate 0.0000, Valid Loss 223.4480




Epoch 144/150: Train Loss 109.4646, Learning Rate 0.0000, Valid Loss 223.8614




Epoch 145/150: Train Loss 109.7422, Learning Rate 0.0000, Valid Loss 226.3234




Epoch 146/150: Train Loss 110.4707, Learning Rate 0.0000, Valid Loss 223.3915




Epoch 147/150: Train Loss 110.7383, Learning Rate 0.0000, Valid Loss 225.6852


Val:   0%|          | 0/29 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers


Epoch 148/150: Train Loss 110.8244, Learning Rate 0.0000, Valid Loss 223.9793


Train:   0%|          | 0/81 [00:00<?, ?it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f15795cf560>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_worke

Epoch 149/150: Train Loss 110.8650, Learning Rate 0.0000, Valid Loss 221.2720


                                                                   

Epoch 150/150: Train Loss 110.3283, Learning Rate 0.0000, Valid Loss 225.4630




# Evaluate Stats

## Copy Ground Truths to Colab 

In [None]:
! cp /content/drive/MyDrive/11785/project/ground_truth.csv /content/data/metadata/

## Dataset for Evaluation

In [None]:
class EvalDataset(Dataset):

    def __init__(self, data_dir, transforms=Compose([CenterCrop((256, 256)), ToTensor()]), labels=labels_dict):
        self.data_dir = data_dir
        self.transforms = transforms
        self.labels = labels

        # # ['dish_1556572657', 'dish_1556573514', 'dish_1556575014', 'dish_1556575083', 'dish_1556575124', 'dish_1556575273', 'dish_1556575327']
        dirs = os.listdir(self.data_dir)

        self.dish_ids = []
        for dir in dirs:
            if "rgb.png" in os.listdir(os.path.join(self.data_dir,dir)):
                self.dish_ids.append(dir)

        self.dish_ids.sort()

        self.img_paths = list(
            map(
                lambda fname: os.path.join(self.data_dir, fname),
                self.dish_ids,
            )
        )

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        rgb_path = f"{self.img_paths[idx]}/rgb.png"
        dish_id = self.dish_ids[idx]
        transformed_img = self.transforms(Image.open(rgb_path))
        # Return the dish id for writing the csv file
        return transformed_img, dish_id

In [None]:
torch.cuda.empty_cache()
eval_train_data = EvalDataset(TRAIN_DIR, labels=labels_dict)
eval_val_data = EvalDataset(VALID_DIR, labels=labels_dict)

eval_train_loader = DataLoader(eval_train_data, batch_size=config.batch_size, shuffle=False, num_workers=2)
eval_val_loader = DataLoader(eval_val_data, batch_size=config.batch_size, shuffle=False, num_workers=2)

## (Optional) Load Checkpoint

In [None]:
model = BaseNet()
_, _, model_state_dict = load_checkpoint(f"{CHECKPOINT_PATH}/baseline")
model.load_state_dict(model_state_dict)
model.cuda()

BaseNet(
  (backbone): InceptionV2(
    (block1): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (block2): Sequential(
      (0): Sequential(
        (0): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (block3): Sequential(
      (0): InceptionV2ModuleA(
        (branch1): Sequential(
          (0): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      

## Inference on training and testing data

In [None]:
prediction_filepath = "/content/data/metadata/outputs.csv"
# if os.path.exists(prediction_filepath):
#     os.remove(prediction_filepath)

model.eval()
batch_bar = tqdm(total=len(eval_train_loader), dynamic_ncols=True, position=0, leave=False, desc='Eval_Train')
results_all = None
# Inference on training data
for i, (x, y) in enumerate(eval_train_loader):

    x = x.cuda()
    dish_ids = np.array(list(y))
    dish_ids = dish_ids.reshape(dish_ids.shape[0],1)

    cal, mass, mn = model(x)

    results = torch.cat((cal,mass,mn), 1).detach().cpu().numpy()

    results = np.concatenate((dish_ids, results), 1)
    
    if results_all is None:
        results_all = results
    else:
        results_all = np.concatenate((results_all, results), 0)

    del cal, mass, mn
    torch.cuda.empty_cache()

    batch_bar.update()
    
batch_bar.close()
# Inference on validation data
batch_bar = tqdm(total=len(eval_val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
for i, (x, y) in enumerate(eval_val_loader):

    x = x.cuda()
    dish_ids = np.array(list(y))
    dish_ids = dish_ids.reshape(dish_ids.shape[0],1)

    cal, mass, mn = model(x)

    results = torch.cat((cal,mass,mn), 1).detach().cpu().numpy()

    results = np.concatenate((dish_ids, results), 1)
    
    if results_all is None:
        results_all = results
    else:
        results_all = np.concatenate((results_all, results), 0)

    del cal, mass, mn
    torch.cuda.empty_cache()

    batch_bar.update()
    
batch_bar.close()



In [None]:
# Write to csv
np.savetxt(prediction_filepath, results_all, delimiter=",", fmt='%s,%s,%s,%s,%s,%s')

In [None]:
r"""Script to compute statistics on nutrition predictions.

This script takes in a csv of nutrition predictions and computes absolute and
percentage mean average error values comparable to the metrics used to eval
models in the Nutrition5k paper. The input csv file of nutrition predictions
should be in the form of:
dish_id, calories, mass, carbs, protein
And the groundtruth values will be pulled from the metadata csv file provided
in the Nutrition5k dataset release where the first 5 fields are also:
dish_id, calories, mass, carbs, protein

Example Usage:
python compute_statistics.py path/to/groundtruth.csv path/to/predictions.csv \
path/to/output_statistics.json
"""

import json
from os import path
import statistics
import sys

DISH_ID_INDEX = 0
DATA_FIELDNAMES = ["dish_id", "calories", "mass", "fat", "carb", "protein"]


def ReadCsvData(filepath):
  if not path.exists(filepath):
    raise Exception("File %s not found" % path)
  parsed_data = {}
  with open(filepath, "r") as f_in:
    filelines = f_in.readlines()
    for line in filelines:
      data_values = line.strip().split(",")
      parsed_data[data_values[DISH_ID_INDEX]] = data_values
  return parsed_data

groundtruth_csv_path = "/content/data/metadata/ground_truth.csv"
predictions_csv_path = prediction_filepath
output_path = "/content/data/metadata/eval_results.json"

groundtruth_data = ReadCsvData(groundtruth_csv_path)
prediction_data = ReadCsvData(predictions_csv_path)

groundtruth_values = {}
err_values = {}
output_stats = {}

for field in DATA_FIELDNAMES[1:]:
  groundtruth_values[field] = []
  err_values[field] = []

for dish_id in prediction_data:
  for i in range(1, len(DATA_FIELDNAMES)):
    groundtruth_values[DATA_FIELDNAMES[i]].append(
        float(groundtruth_data[dish_id][i]))
    err_values[DATA_FIELDNAMES[i]].append(abs(
        float(prediction_data[dish_id][i])
        - float(groundtruth_data[dish_id][i])))

for field in DATA_FIELDNAMES[1:]:
  output_stats[field + "_MAE"] = statistics.mean(err_values[field])
  output_stats[field + "_MAE_%"] = (100 * statistics.mean(err_values[field]) /
                                    statistics.mean(groundtruth_values[field]))

with open(output_path, "w") as f_out:
  f_out.write(json.dumps(output_stats))
