In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_pickle("data/dataset.pkl")
df.head()

Unnamed: 0,image,points
0,data/dactar/1300_img.png,"[177.16228095049055, 184.83182101741053, 244.7..."
1,data/dactar/1766_img.png,"[409.7109787838158, 362.74050003943523, 536.55..."
2,data/dactar/130_img.png,"[386.66894459252546, 342.1963080587535, 503.59..."
3,data/dactar/1785_img.png,"[197.75577726950075, 171.98786970581278, 250.1..."
4,data/dactar/617_img.png,"[180.14505945291972, 298.14972872833135, 288.5..."


In [26]:
from torch.utils.data import Dataset
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from PIL import Image
import random
import cv2
import progressbar
import time

### Utilities

In [27]:
def to_2d_tensor(inp):
    inp = torch.Tensor(inp)
    if len(inp.size()) < 2:
        inp = inp.unsqueeze(0)
    return inp

def xywh_to_x1y1x2y2(boxes):
    boxes = to_2d_tensor(boxes)
    boxes[:, 2] += boxes[:, 0] - 1
    boxes[:, 3] += boxes[:, 1] - 1
    return boxes

def x1y1x2y2_to_xywh(boxes):
    boxes = to_2d_tensor(boxes)
    boxes[:, 2] -= boxes[:, 0] - 1
    boxes[:, 3] -= boxes[:, 1] - 1
    return boxes

def crop_boxes(boxes, im_sizes):
    boxes = to_2d_tensor(boxes)
    im_sizes = to_2d_tensor(im_sizes)
    boxes = xywh_to_x1y1x2y2(boxes)
    zero = torch.Tensor([0])
    boxes[:, 0] = torch.max(torch.min(boxes[:, 0], im_sizes[:, 0]), zero)
    boxes[:, 1] = torch.max(torch.min(boxes[:, 1], im_sizes[:, 1]), zero)
    boxes[:, 2] = torch.max(torch.min(boxes[:, 2], im_sizes[:, 0]), zero)
    boxes[:, 3] = torch.max(torch.min(boxes[:, 3], im_sizes[:, 1]), zero)
    boxes = x1y1x2y2_to_xywh(boxes)
    return boxes

def box_transform(boxes, im_sizes):
    # box in (x, y, w, h) format
    boxes = to_2d_tensor(boxes)
    im_sizes = to_2d_tensor(im_sizes)
    boxes[:, 0] = 2 * boxes[:, 0] / im_sizes[:, 0] - 1
    boxes[:, 1] = 2 * boxes[:, 1] / im_sizes[:, 1] - 1
    boxes[:, 2] = 2 * boxes[:, 2] / im_sizes[:, 0]
    boxes[:, 3] = 2 * boxes[:, 3] / im_sizes[:, 1]
    return boxes

def box_transform_inv(boxes, im_sizes):
    # box in (x, y, w, h) format
    boxes = to_2d_tensor(boxes)
    im_sizes = to_2d_tensor(im_sizes)
    boxes[:, 0] = (boxes[:, 0] + 1) / 2 * im_sizes[:, 0]
    boxes[:, 1] = (boxes[:, 1] + 1) / 2 * im_sizes[:, 1]
    boxes[:, 2] = boxes[:, 2] / 2 * im_sizes[:, 0]
    boxes[:, 3] = boxes[:, 3] / 2 * im_sizes[:, 1]
    return boxes

def compute_IoU(boxes1, boxes2):
    boxes1 = to_2d_tensor(boxes1)
    boxes1 = xywh_to_x1y1x2y2(boxes1)
    boxes2 = to_2d_tensor(boxes2)
    boxes2 = xywh_to_x1y1x2y2(boxes2)
    
    intersec = boxes1.clone()
    intersec[:, 0] = torch.max(boxes1[:, 0], boxes2[:, 0])
    intersec[:, 1] = torch.max(boxes1[:, 1], boxes2[:, 1])
    intersec[:, 2] = torch.min(boxes1[:, 2], boxes2[:, 2])
    intersec[:, 3] = torch.min(boxes1[:, 3], boxes2[:, 3])
    
    def compute_area(boxes):
        # in (x1, y1, x2, y2) format
        dx = boxes[:, 2] - boxes[:, 0]
        dx[dx < 0] = 0
        dy = boxes[:, 3] - boxes[:, 1]
        dy[dy < 0] = 0
        return dx * dy
    
    a1 = compute_area(boxes1)
    a2 = compute_area(boxes2)
    ia = compute_area(intersec)
    assert((a1 + a2 - ia <= 0).sum() == 0)
    
    return ia / (a1 + a2 - ia)    

def compute_acc(preds, targets, im_sizes, theta=0.75):
    preds = box_transform_inv(preds.clone(), im_sizes)
    preds = crop_boxes(preds, im_sizes)
    targets = box_transform_inv(targets.clone(), im_sizes)
    IoU = compute_IoU(preds, targets)
    corr = (IoU >= theta).sum()
    return corr / preds.size(0)

class AverageMeter(object):
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.cnt = 0
        
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.cnt += n
        self.avg = self.sum / self.cnt

### Image Transform

In [28]:
class ResizeAspect(object):
    def __init__(self, h, w):
        self.rescale_factor=None
        self.shift_w=None
        self.shift_h=None
        self.hw = (h, w)
        
    def do_image(self,img):
        h, w = self.hw
        img_h, img_w = img.shape[0], img.shape[1]
        rescale_factor = min(w/img_w, h/img_h)
        new_w = int(img_w * rescale_factor)
        new_h = int(img_h * rescale_factor)
        resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)

        canvas = np.full((h, w, 3), 128, dtype=np.uint8)
        shift_h = (h-new_h)//2
        shift_w = (w-new_w)//2
        canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
        img = canvas.copy()
        self.rescale_factor=rescale_factor
        self.shift_h = shift_h
        self.shift_w = shift_w
        return img
    
    def do_box(self, box):
#         if self.rescale_factor is None:
#             print('The image is not scaled, do_image first!!')
#             return
        box = box.reshape(-1,2)
        box *=self.rescale_factor
        box[: ,0] += self.shift_w
        box[: ,1] += self.shift_h
        box = box.reshape(-1)
        return box
    
    def undo_box(self, box):
        box = box.reshape(-1,2)
        box[: ,0] -= self.shift_w
        box[: ,1] -= self.shift_h
        box /=self.rescale_factor
        box = box.reshape(-1)
        return box
    
    
# class ImageData(object):
    
#     def __init__(self, img_dir, box):
#         self.img_dir = img_dir
#         self.box = box
#         self.img = None
#         self._load_img()
        
#     def _load_img(self):
#         self.img = Image.open(self.img_dir).convert('RGB')
#         self.img = np.array(img)

In [29]:
class FinalTransform:
    def __init__(self):
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])
    
    def transform_inv(self,img):
        inp = img.numpy().transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        inp = std * inp + mean
        inp = np.clip(inp, 0, 1)
        return inp

### Dataset Loader

In [40]:
class LicenseDataset(Dataset):
    def __init__(self, df):
        
        self.imgs = list(df.image)
        self.boxes = df.points.tolist()
#         self.labeled = df.labeled.tolist()
        self.final_transform = FinalTransform()
        self.transform = self.final_transform.transform
#         self.transform = transforms.Compose([
# #                 transforms.Resize((224, 224)),
#             transforms.ToTensor(),
#             transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                                  std=[0.229, 0.224, 0.225])
#         ])
        
    def transform_inv(self,img):
        inp = img.numpy().transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        inp = std * inp + mean
        inp = np.clip(inp, 0, 1)
        return inp
            
    def __getitem__(self, index):
        path= self.imgs[index]
        box = self.boxes[index]
        
        img = Image.open(path).convert('RGB')
        resizer = ResizeAspect(h=224, w=224)
        img = resizer.do_image(np.array(img))
        img = self.final_transform.transform(img)
        box = resizer.do_box(box)
        box = np.array(box, dtype=np.float32)
        
        return img, box #, resizer
    
    def __len__(self):
        return len(self.imgs)

In [44]:
def train_test_split(df, test_size=0.1):
    if isinstance(test_size, float):
        test_size = int(test_size * len(df))

    indices = df.index.tolist()
    test_indices = random.sample(population=indices, k=test_size)

    test_df = df.loc[test_indices]
    train_df = df.drop(test_indices)
    return train_df, test_df

In [45]:
train_df, test_df = train_test_split(df, test_size=0.1)

In [46]:
train = LicenseDataset(train_df)
test = LicenseDataset(test_df)

In [47]:
# ind = random.choice(range(len(train_df)))
# img, box, img_size = train[ind]

# box = np.append(box, box[:2]).reshape(-1,2)
# img = train.final_transform.transform_inv(img)

# plt.imshow(img)
# plt.plot(box[:,0], box[:,1], lw=3, c='g')

In [48]:
train_loader = torch.utils.data.DataLoader(
                train, batch_size=32,shuffle=True,
                num_workers=2, pin_memory=True)
test_loader = torch.utils.data.DataLoader(
                test, batch_size=32,shuffle=False,
                num_workers=2, pin_memory=True)

### Defining Model

In [49]:
model = models.resnet18(pretrained=True)
'''
output of our model is :
x1, y1,
x2, y2,
x3, y3,
x4, y4,
conf -> only when no bounding box images are taken
'''
num_feature = model.fc.in_features
num_output = 8#9
model.fc = nn.Linear(num_feature, num_output)
# model.eval()
# model.train()

In [50]:
criterion = nn.SmoothL1Loss().cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [51]:
best_model_state = model.state_dict()
best_epoch = -1
best_acc = 0.0

epoch_loss = {True: [], False: []}
epoch_acc = {True: [], False: []}
epochs = 20

In [52]:
for epoch in range(epochs):
    accs = AverageMeter()
    losses = AverageMeter()
    for train_mode in (True, False):
        if train_mode:
            scheduler.step()
            data_loader = train_loader
        else:
            data_loader = test_loader
        model.train(mode=train_mode)
            
        end = time.time()
        bar = progressbar.ProgressBar()
        for ims, boxes in bar(data_loader):
            
            inputs = ims
            targets = boxes
            
            optimizer.zero_grad()
            
            # forward
            outputs = model(inputs)
            loss = criterion(outputs, targets)
#             acc = compute_acc(outputs.data.cpu(), targets.data.cpu(), im_sizes)
            
            nsample = inputs.size(0)
#             accs.update(acc, nsample)
            losses.update(loss.data[0], nsample)
            
            if train_mode:
                loss.backward()
                optimizer.step()
        
#         if not train_mode and accs.avg > best_acc:
#             best_acc = accs.avg
#             best_epoch = epoch
#             best_model_state = model.state_dict()
            
        elapsed_time = time.time() - end
        print('[{}]\tEpoch: {}/{}\tLoss: {:.4f}\tAcc: {:.2%}\tTime: {:.3f}'.format(
            phase, epoch+1, epochs, losses.avg, accs.avg, elapsed_time))
        epoch_loss[phase].append(losses.avg)
        epoch_acc[phase].append(accs.avg)
        
    print('[Info] best test acc: {:.2%} at {}th epoch'.format(best_acc, best_epoch))
    torch.save(best_model_state, 'best_model_state.path.tar')

  0% |                                                                        |

RuntimeError: Expected object of scalar type Float but got scalar type Double for argument #2 'target'