In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import pandas as pd
import numpy as np
import torch
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import random

In [2]:
import cv2
import matplotlib.pyplot as plt 

In [3]:
PATH = Path("/home/ubuntu/data/sandwich/")

In [4]:
list(PATH.iterdir())

[PosixPath('/home/ubuntu/data/sandwich/train-315'),
 PosixPath('/home/ubuntu/data/sandwich/gw-train-315'),
 PosixPath('/home/ubuntu/data/sandwich/gw-he-train-315'),
 PosixPath('/home/ubuntu/data/sandwich/.DS_Store'),
 PosixPath('/home/ubuntu/data/sandwich/test'),
 PosixPath('/home/ubuntu/data/sandwich/gw-he-test-315'),
 PosixPath('/home/ubuntu/data/sandwich/ce-test-315'),
 PosixPath('/home/ubuntu/data/sandwich/train'),
 PosixPath('/home/ubuntu/data/sandwich/he-train-315'),
 PosixPath('/home/ubuntu/data/sandwich/gw-test-315'),
 PosixPath('/home/ubuntu/data/sandwich/test-315'),
 PosixPath('/home/ubuntu/data/sandwich/ce-train-315'),
 PosixPath('/home/ubuntu/data/sandwich/he-test-315'),
 PosixPath('/home/ubuntu/data/sandwich/.ipynb_checkpoints')]

In [5]:
def read_image(path):
    im = cv2.imread(str(path))
    return cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

# Data augmentation

In [6]:
import math
def crop(im, r, c, target_r, target_c): return im[r:r+target_r, c:c+target_c]


# random crop to the original size
def random_crop(x, r_pix=8):
    """ Returns a random crop"""
    r, c,*_ = x.shape
    r, c,*_ = x.shape
    c_pix = round(r_pix*c/r)
    rand_r = random.uniform(0, 1)
    rand_c = random.uniform(0, 1)
    start_r = np.floor(2*rand_r*r_pix).astype(int)
    start_c = np.floor(2*rand_c*c_pix).astype(int)
    return crop(x, start_r, start_c, r-2*r_pix, c-2*c_pix)

def center_crop(x, r_pix=8):
    r, c,*_ = x.shape
    c_pix = round(r_pix*c/r)
    return crop(x, r_pix, c_pix, r-2*r_pix, c-2*c_pix)


def rotate_cv(im, deg, mode=cv2.BORDER_REFLECT, interpolation=cv2.INTER_AREA):
    """ Rotates an image by deg degrees"""
    r,c,*_ = im.shape
    M = cv2.getRotationMatrix2D((c/2,r/2),deg,1)
    return cv2.warpAffine(im,M,(c,r), borderMode=mode, 
                          flags=cv2.WARP_FILL_OUTLIERS+interpolation)

# Train-validation split

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
def get_files(path):
    paths = [d for d in list(path.iterdir()) if d.is_dir()]
    files = [f for d in paths for f in list(d.iterdir())]
    return files

In [9]:
path=PATH/"train-315"

In [10]:
files = get_files(path)

In [11]:
# files

In [12]:
file_labels = [p.parts[-2] for p in files]

In [13]:
train_files, valid_files, y_train, y_valid = train_test_split(files, file_labels, test_size=0.2, random_state=42)

In [14]:
# from collections import Counter

In [15]:
# Counter(y_train), Counter(y_valid)

# Dataset

In [16]:
def normalize(im):
    """Normalizes images with Imagenet stats."""
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
    return (im - imagenet_stats[0])/imagenet_stats[1]

In [17]:
paths = [d for d in list(path.iterdir()) if d.is_dir()]

In [18]:
labels = [p.parts[-1] for p in paths]

In [19]:
labels

['hot_dog',
 'pulled_pork_sandwich',
 'grilled_cheese_sandwich',
 'lobster_roll_sandwich',
 'hamburger',
 'club_sandwich']

In [20]:
class SandwichDataset(Dataset):
    def __init__(self, files, labels, transforms=False):
        self.files = files
        self.label2ind = {v:k for k,v in enumerate(labels)}
        self.transforms = transforms
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        path = self.files[idx]
        name = path.parts[-1]
        y_class = self.label2ind[path.parts[-2]]
        x = cv2.imread(str(path)).astype(np.float32)
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)/255
        if self.transforms:
            rdeg = (np.random.random()-.50)*20
            x = rotate_cv(x, rdeg)
            if np.random.random() > 0.5: x = np.fliplr(x).copy()
            x = random_crop(x)
        else:
            x = center_crop(x)
        x = normalize(x)
        y = self.label2ind[path.parts[-2]]
        return np.rollaxis(x, 2), y

In [21]:
train_ds = SandwichDataset(files=train_files, labels=labels, transforms=True)

In [22]:
valid_ds = SandwichDataset(files=valid_files, labels=labels, transforms=False)

In [23]:
# len(train_ds), len(valid_ds)

In [24]:
x, y = train_ds[0]

In [25]:
# x.shape, y

In [26]:
batch_size = 50

In [27]:
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

In [28]:
valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False)

In [29]:
x, y = next(iter(train_dl))

In [30]:
x = x.cuda().float()
y = y.cuda()

In [31]:
# x.shape, y.shape

(torch.Size([50, 3, 299, 299]), torch.Size([50]))

In [32]:
# y.shape[0]

50

# Initialize pre-trained model with frozen hidden layers

In [33]:
import pretrainedmodels

In [34]:
# model_name = 'inceptionresnetv2'

In [35]:
# model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')

In [36]:
downloaded_model_path = Path("/home/ubuntu/models/sandwich/downloaded_inceptionresnetv2.pth")

In [37]:
# torch.save(model, downloaded_model_path)

In [38]:
inception_resnet = torch.load(downloaded_model_path)

In [39]:
layers = list(inception_resnet.children())

In [40]:
print(len(layers))

17


In [73]:
layers[12]

Sequential(
  (0): Block8(
    (branch0): BasicConv2d(
      (conv): Conv2d(2080, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (branch1): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(2080, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU()
      )
      (1): BasicConv2d(
        (conv): Conv2d(192, 224, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1), bias=False)
        (bn): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU()
      )
      (2): BasicConv2d(
        (conv): Conv2d(224, 256, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0), bias=False)
        (bn): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU()
    

In [41]:
inception_resnet.last_linear

Linear(in_features=1536, out_features=1000, bias=True)

In [42]:
class InceptionResnet(nn.Module):
    def __init__(self):
        super(InceptionResnet, self).__init__()
        self.model = torch.load(Path("/home/ubuntu/models/sandwich/downloaded_inceptionresnetv2.pth"))
        # freezing parameters
        for param in self.model.parameters():
            param.requires_grad = False
        self.model.last_linear = nn.Linear(1536, 6)
    
    def forward(self, x):
        x = self.model(x)
        return x

In [43]:
model = InceptionResnet().cuda()

In [44]:
out = model(x)

In [45]:
# out.shape

torch.Size([100, 6])

In [46]:
# _, pred = torch.max(out, 1)

In [47]:
# pred

tensor([0, 2, 5, 0, 0, 4, 0, 0, 0, 5, 4, 4, 2, 0, 0, 0, 2, 2, 2, 5, 0, 2, 2, 0,
        2, 2, 1, 2, 0, 0, 0, 0, 4, 0, 1, 0, 3, 0, 4, 3, 3, 0, 1, 0, 4, 0, 4, 4,
        2, 0, 3, 2, 0, 3, 5, 4, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 5, 0, 0, 1,
        1, 0, 0, 4, 2, 0, 2, 4, 0, 0, 5, 0, 0, 2, 0, 0, 2, 0, 2, 0, 3, 0, 2, 4,
        0, 2, 0, 0], device='cuda:0')

In [48]:
# y

tensor([4, 0, 2, 3, 3, 5, 4, 5, 3, 5, 4, 2, 5, 4, 3, 0, 2, 2, 3, 2, 4, 2, 0, 2,
        4, 3, 4, 4, 5, 5, 2, 3, 1, 0, 5, 1, 1, 5, 1, 0, 2, 1, 1, 2, 0, 1, 5, 3,
        2, 0, 0, 3, 0, 5, 1, 0, 0, 0, 4, 2, 0, 4, 5, 0, 0, 5, 3, 4, 0, 1, 2, 2,
        5, 4, 1, 0, 5, 1, 4, 3, 4, 5, 1, 3, 1, 3, 1, 3, 3, 1, 3, 5, 5, 2, 2, 3,
        2, 2, 4, 5], device='cuda:0')

In [49]:
# pred.eq(y).sum()

tensor(18, device='cuda:0')

In [50]:
F.cross_entropy(out, y).item()

1.7973129749298096

# Compute accuracy

In [51]:
def val_metrics(model, valid_dl):
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0 
    for x, y in valid_dl:
        batch = y.shape[0]
        x = x.cuda().float()
        y = y.cuda()
        out = model(x)
        _, pred = torch.max(out, 1)
        correct += pred.eq(y).sum().item()
        loss = F.cross_entropy(out, y)
        sum_loss += batch*(loss.item())
        total += batch
    return sum_loss/total, correct/total

In [52]:
val_metrics(model, valid_dl)

(1.8247488074832492, 0.10333333333333333)

# Train model with fixed learning rate

In [53]:
def get_optimizer(model, lr=0.01, wd=0.0):
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optim = torch.optim.Adam(parameters, lr=lr, weight_decay=wd)
    return optim

def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [54]:
def train(model, epochs=5, learning_rate=0.01):
    optimzer = get_optimizer(model, lr=learning_rate, wd=0)
    prev_val_acc = 0.0
    for i in range(epochs):
        model.train()
        total = 0
        sum_loss = 0
        for x, y in train_dl:
            batch = y.shape[0]
            x = x.cuda().float()
            y = y.cuda()
            out = model(x)
            _, pred = torch.max(out, 1)
            loss = F.cross_entropy(out, y)
            optimzer.zero_grad()
            loss.backward()
            optimzer.step()
            total += batch
            sum_loss += batch*(loss.item())
        val_loss, val_acc = val_metrics(model, valid_dl)
        if i % 2 == 0:
            print("train loss %.3f val loss %.3f val accuracy %.3f" % (sum_loss/total, val_loss, val_acc))
        if val_acc > prev_val_acc: 
            prev_val_acc = val_acc
            if val_acc > 0.68:
                path = "{0}/ft_inceptionresnet_loss_{1:.0f}.pth".format(model_path, 100*val_acc)
                save_model(model, path)
                print(path)

In [55]:
model = InceptionResnet().cuda()

In [56]:
train(model, epochs=10, learning_rate=0.01)

train loss 1.648 val loss 1.019 val accuracy 0.648
train loss 1.085 val loss 0.916 val accuracy 0.658
train loss 1.132 val loss 1.052 val accuracy 0.611
train loss 1.173 val loss 0.970 val accuracy 0.648
train loss 1.142 val loss 0.953 val accuracy 0.653


In [57]:
def save_model(m, p): torch.save(m.state_dict(), p)
    
def load_model(m, p): m.load_state_dict(torch.load(p))

In [58]:
model_path = Path("/home/ubuntu/models/sandwich/")

In [59]:
save_model(model, model_path/"initial_inceptionresnet.pth")

In [60]:
model = InceptionResnet().cuda()

In [61]:
load_model(model, model_path/"initial_inceptionresnet.pth")

In [62]:
val_metrics(model, valid_dl)

(0.9449603292677138, 0.6588888888888889)