<a href="https://colab.research.google.com/github/tsakailab/iip/blob/main/sandbox/object_detection_sample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  
# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json
User uploaded file "kaggle.json" with length 66 bytes


In [2]:
!kaggle competitions list

ref                                            deadline             category            reward  teamCount  userHasEntered  
---------------------------------------------  -------------------  ---------------  ---------  ---------  --------------  
contradictory-my-dear-watson                   2030-07-01 23:59:00  Getting Started     Prizes         88           False  
gan-getting-started                            2030-07-01 23:59:00  Getting Started     Prizes         76           False  
store-sales-time-series-forecasting            2030-06-30 23:59:00  Getting Started  Knowledge        908           False  
tpu-getting-started                            2030-06-03 23:59:00  Getting Started  Knowledge        146           False  
digit-recognizer                               2030-01-01 00:00:00  Getting Started  Knowledge       1673           False  
titanic                                        2030-01-01 00:00:00  Getting Started  Knowledge      14584            True  
house-pr

In [3]:
!kaggle datasets download "andrewmvd/road-sign-detection" --unzip

Downloading road-sign-detection.zip to /content
 95% 208M/218M [00:05<00:00, 45.5MB/s]
100% 218M/218M [00:05<00:00, 42.5MB/s]


In [4]:
#library imports
import os
import random
import math
from datetime import datetime
from collections import Counter
import pandas as pd
import numpy as np

import cv2
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET

import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

In [5]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [6]:
images_path = Path('./images')
anno_path = Path('./annotations')

In [7]:
def filelist(root, file_type):
    """Returns a fully-qualified list of filenames under root directory"""
    return [os.path.join(directory_path, f) for directory_path, directory_name, 
            files in os.walk(root) for f in files if f.endswith(file_type)]

def generate_train_df (anno_path):
    annotations = filelist(anno_path, '.xml')
    anno_list = []
    for anno_path in annotations:
        root = ET.parse(anno_path).getroot()
        anno = {}
        anno['filename'] = Path(str(images_path) + '/'+ root.find("./filename").text)
        anno['width'] = root.find("./size/width").text
        anno['height'] = root.find("./size/height").text
        anno['class'] = root.find("./object/name").text
        anno['xmin'] = int(root.find("./object/bndbox/xmin").text)
        anno['ymin'] = int(root.find("./object/bndbox/ymin").text)
        anno['xmax'] = int(root.find("./object/bndbox/xmax").text)
        anno['ymax'] = int(root.find("./object/bndbox/ymax").text)
        anno_list.append(anno)
    return pd.DataFrame(anno_list)

In [8]:
df_train = generate_train_df(anno_path)

In [9]:
#label encode target
class_dict = {'speedlimit': 0, 'stop': 1, 'crosswalk': 2, 'trafficlight': 3}
df_train['class'] = df_train['class'].apply(lambda x:  class_dict[x])

In [12]:
print(df_train.shape)
print(len(df_train))
df_train.head()

(877, 8)
877


Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,images/road574.png,300,400,0,86,202,206,327
1,images/road481.png,300,400,0,90,230,144,284
2,images/road779.png,300,400,0,68,142,195,269
3,images/road649.png,300,400,0,127,155,154,183
4,images/road226.png,300,400,0,134,175,156,197


In [13]:
df_train = df_train.reset_index()

In [14]:
df_train.head()

Unnamed: 0,index,filename,width,height,class,xmin,ymin,xmax,ymax
0,0,images/road574.png,300,400,0,86,202,206,327
1,1,images/road481.png,300,400,0,90,230,144,284
2,2,images/road779.png,300,400,0,68,142,195,269
3,3,images/road649.png,300,400,0,127,155,154,183
4,4,images/road226.png,300,400,0,134,175,156,197


In [15]:
X = df_train
Y = df_train

In [16]:
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, random_state=42)
X_train = X_train.reset_index()
X_val = X_val.reset_index()

In [17]:
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt


hparam = {'batch_size': 64, 'image_size': (224, 224),
          'shuffle': True, 'num_workers': 2,
          'mean_color': [0.485, 0.456, 0.406, 0], 'std_color': [0.229, 0.224, 0.225, 1]}

transform = transforms.Compose([
    transforms.Resize((400, 300)),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize(hparam['mean_color'], hparam['std_color']),
    
])

class RoadDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.preprocess = transforms.Compose([
            transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        path = self.dataframe["filename"][idx]
        y_class = self.dataframe["class"][idx]
        bbox = [self.dataframe["xmin"][idx], self.dataframe["ymin"][idx], self.dataframe["xmax"][idx], self.dataframe["ymax"][idx]]
        bbox = torch.tensor(bbox)
        out_img = Image.open(path)
        out_img = self.preprocess(out_img)
        mask = torch.zeros((out_img.shape[1], out_img.shape[2]))
        mask[bbox[1]:bbox[3], bbox[0]:bbox[2]] = 1
        out_img = torch.cat((out_img[0:3], mask.unsqueeze(0)), 0)
        if self.transform:
            out_img = self.transform(out_img)
        bbox = torch.cat((out_img[-1].nonzero()[:, 1].min().unsqueeze(0), out_img[-1].nonzero()[:, 0].min().unsqueeze(0), out_img[-1].nonzero()[:, 1].max().unsqueeze(0), out_img[-1].nonzero()[:, 0].max().unsqueeze(0)), 0)
        return out_img[0:3], y_class, bbox

train_ds = RoadDataset(X_train , transform=transform)
val_ds = RoadDataset(X_val , transform=transform)

In [18]:
print(len(df_train))
print(len(train_ds))
print(len(val_ds))

877
701
176


In [19]:
batch_size = 64
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(val_ds, batch_size=batch_size)

In [20]:
class BB_model(nn.Module):
    def __init__(self):
        super(BB_model, self).__init__()
        resnet = models.resnet34(pretrained=True)
        layers = list(resnet.children())[:8]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])
        self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        
    def forward(self, x):
        x = self.features1(x)
        x = self.features2(x)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        x = x.view(x.shape[0], -1)
        return self.classifier(x), self.bb(x)

In [21]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [22]:
def train_epocs(model, optimizer, train_dl, val_dl, epochs=10,C=1000):
    idx = 0
    for i in range(epochs):
        model.train()
        total = 0
        sum_loss = 0
        for x, y_class, y_bb in train_dl:
            batch = y_class.shape[0]
            x = x.cuda().float()
            y_class = y_class.cuda()
            y_bb = y_bb.cuda().float()
            out_class, out_bb = model(x)
            loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
            loss_bb = loss_bb.sum()
            loss = loss_class + loss_bb/C
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            idx += 1
            total += batch
            sum_loss += loss.item()
        train_loss = sum_loss/total
        val_loss, val_acc = val_metrics(model, valid_dl, C)
        print("train_loss %.3f val_loss %.3f val_acc %.3f" % (train_loss, val_loss, val_acc))
    return sum_loss/total


In [23]:
def val_metrics(model, valid_dl, C=1000):
    with torch.no_grad():
        model.eval()
        total = 0
        sum_loss = 0
        correct = 0 
        for x, y_class, y_bb in valid_dl:
            batch = y_class.shape[0]
            x = x.cuda().float()
            y_class = y_class.cuda()
            y_bb = y_bb.cuda().float()
            out_class, out_bb = model(x)
            loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
            loss_bb = loss_bb.sum()
            loss = loss_class + loss_bb/C
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()
            sum_loss += loss.item()
            total += batch
    return sum_loss/total, correct/total

In [24]:
model = BB_model().cuda()
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.006)

In [25]:

        # return out_img[0:3], y_class, bbox
        
def train_epocs(model, optimizer, train_dl, val_dl, epochs=10,C=1000):
    idx = 0
    for i in range(epochs):
        model.train()
        total = 0
        sum_loss = 0
        for x, y_class, y_bb in train_dl:
            batch = y_class.shape[0]
            x = x.cuda().float()
            y_class = y_class.cuda()
            y_bb = y_bb.cuda().float()
            out_class, out_bb = model(x)
            loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
            loss_bb = loss_bb.sum()
            loss = loss_class + loss_bb/C
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            idx += 1
            total += batch
            sum_loss += loss.item()
        train_loss = sum_loss/total
        val_loss, val_acc = val_metrics(model, valid_dl, C)
        print("train_loss %.3f val_loss %.3f val_acc %.3f" % (train_loss, val_loss, val_acc))
    return sum_loss/total

train_epocs(model, optimizer, train_dl, valid_dl, epochs=15)

train_loss 2.610 val_loss 9283.244 val_acc 0.080
train_loss 1.819 val_loss 307.220 val_acc 0.324
train_loss 1.557 val_loss 43.103 val_acc 0.341
train_loss 1.304 val_loss 7.262 val_acc 0.699
train_loss 1.234 val_loss 1.573 val_acc 0.744
train_loss 1.122 val_loss 2.110 val_acc 0.761
train_loss 1.094 val_loss 3.063 val_acc 0.494
train_loss 0.994 val_loss 1.040 val_acc 0.773
train_loss 0.943 val_loss 0.770 val_acc 0.795
train_loss 0.864 val_loss 0.753 val_acc 0.778
train_loss 0.859 val_loss 0.818 val_acc 0.790
train_loss 0.875 val_loss 0.714 val_acc 0.818
train_loss 0.863 val_loss 0.839 val_acc 0.824
train_loss 0.853 val_loss 0.885 val_acc 0.784
train_loss 0.878 val_loss 0.870 val_acc 0.750


0.8784324167118263

In [26]:
update_optimizer(optimizer, 0.001)
train_epocs(model, optimizer, train_dl, valid_dl, epochs=10)

train_loss 0.838 val_loss 0.681 val_acc 0.807
train_loss 0.789 val_loss 0.675 val_acc 0.801
train_loss 0.799 val_loss 0.676 val_acc 0.812
train_loss 0.762 val_loss 0.661 val_acc 0.801
train_loss 0.763 val_loss 0.655 val_acc 0.790
train_loss 0.752 val_loss 0.667 val_acc 0.807
train_loss 0.753 val_loss 0.659 val_acc 0.812
train_loss 0.738 val_loss 0.637 val_acc 0.812
train_loss 0.754 val_loss 0.636 val_acc 0.818
train_loss 0.738 val_loss 0.639 val_acc 0.812


0.7376027127645495

In [27]:
# choose random image from validation set
X_val

Unnamed: 0,level_0,index,filename,width,height,class,xmin,ymin,xmax,ymax
0,710,710,images/road221.png,300,400,0,131,185,154,208
1,734,734,images/road766.png,300,400,0,134,188,174,227
2,244,244,images/road107.png,320,400,0,90,37,233,232
3,231,231,images/road58.png,400,301,1,58,24,331,295
4,291,291,images/road788.png,300,400,0,106,147,149,190
...,...,...,...,...,...,...,...,...,...,...
171,500,500,images/road444.png,300,400,0,146,122,190,166
172,215,215,images/road249.png,300,400,0,145,239,156,251
173,350,350,images/road158.png,300,400,2,168,112,225,170
174,81,81,images/road634.png,300,400,1,123,83,168,128


In [None]:
# resizing test image
im = read_image('./road_signs/images_resized/road789.png')
im = cv2.resize(im, (int(1.49*300), 300))
cv2.imwrite('./road_signs/road_signs_test/road789.jpg', cv2.cvtColor(im, cv2.COLOR_RGB2BGR))

In [None]:
# test Dataset
test_ds = RoadDataset(pd.DataFrame([{'path':'./road_signs/road_signs_test/road789.jpg'}])['path'],pd.DataFrame([{'bb':np.array([0,0,0,0])}])['bb'],pd.DataFrame([{'y':[0]}])['y'])
x, y_class, y_bb = test_ds[0]

In [None]:
xx = torch.FloatTensor(x[None,])
xx.shape

In [None]:
# prediction
out_class, out_bb = model(xx.cuda())
out_class, out_bb

In [None]:
# predicted class
torch.max(out_class, 1)

In [None]:
# predicted bounding box
bb_hat = out_bb.detach().cpu().numpy()
bb_hat = bb_hat.astype(int)
show_corner_bb(im, bb_hat[0])