In [117]:
# from google.colab import drive
# drive.mount('/content/drive')

In [118]:
# Get object detection library Yolo version 7
! git clone https://github.com/WongKinYiu/yolov7.git
%cd yolov7

In [119]:
# Using Colab - mounting the drive which has the dataset.zip
# unzip dataset
! unzip -qq /content/drive/MyDrive/dataset.zip

In [120]:
# Structure the folders for Yolo
! mkdir dataset/images/train
! mkdir dataset/images/val
! mkdir dataset/images/test
! mv dataset/images/*.jpg dataset/images/train/
! mkdir -p dataset/labels/train
! mkdir -p dataset/labels/val

In [123]:
# Adding YAML file for Yolo to know the classes and where to find the train/val
with open(f"data/dataset.yaml", 'w') as writefile:
    writefile.write("""train: ./dataset/images/train/
val: ./dataset/images/val/

# number of classes
nc: 11

# class names
names: [ 'GRAFFITI', 'FADED_SIGNAGE', 'POTHOLES', 'GARBAGE',
         'CONSTRUCTION_ROAD', 'BROKEN_SIGNAGE', 'BAD_STREETLIGHT',
         'BAD_BILLBOARD', 'SAND_ON_ROAD','CLUTTER_SIDEWALK', 'UNKEPT_FACADE' ]""")

# Same as Yolov7.yaml but modified the number of classes
with open(f"cfg/training/yolov7_custom.yaml", 'w') as writefile:
    writefile.write("""# parameters
nc: 11  # number of classes
depth_multiple: 1.0  # model depth multiple
width_multiple: 1.0  # layer channel multiple

# anchors
anchors:
  - [12,16, 19,36, 40,28]  # P3/8
  - [36,75, 76,55, 72,146]  # P4/16
  - [142,110, 192,243, 459,401]  # P5/32

# yolov7 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Conv, [32, 3, 1]],  # 0
  
   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2      
   [-1, 1, Conv, [64, 3, 1]],
   
   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4  
   [-1, 1, Conv, [64, 1, 1]],
   [-2, 1, Conv, [64, 1, 1]],
   [-1, 1, Conv, [64, 3, 1]],
   [-1, 1, Conv, [64, 3, 1]],
   [-1, 1, Conv, [64, 3, 1]],
   [-1, 1, Conv, [64, 3, 1]],
   [[-1, -3, -5, -6], 1, Concat, [1]],
   [-1, 1, Conv, [256, 1, 1]],  # 11
         
   [-1, 1, MP, []],
   [-1, 1, Conv, [128, 1, 1]],
   [-3, 1, Conv, [128, 1, 1]],
   [-1, 1, Conv, [128, 3, 2]],
   [[-1, -3], 1, Concat, [1]],  # 16-P3/8  
   [-1, 1, Conv, [128, 1, 1]],
   [-2, 1, Conv, [128, 1, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [[-1, -3, -5, -6], 1, Concat, [1]],
   [-1, 1, Conv, [512, 1, 1]],  # 24
         
   [-1, 1, MP, []],
   [-1, 1, Conv, [256, 1, 1]],
   [-3, 1, Conv, [256, 1, 1]],
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, -3], 1, Concat, [1]],  # 29-P4/16  
   [-1, 1, Conv, [256, 1, 1]],
   [-2, 1, Conv, [256, 1, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [[-1, -3, -5, -6], 1, Concat, [1]],
   [-1, 1, Conv, [1024, 1, 1]],  # 37
         
   [-1, 1, MP, []],
   [-1, 1, Conv, [512, 1, 1]],
   [-3, 1, Conv, [512, 1, 1]],
   [-1, 1, Conv, [512, 3, 2]],
   [[-1, -3], 1, Concat, [1]],  # 42-P5/32  
   [-1, 1, Conv, [256, 1, 1]],
   [-2, 1, Conv, [256, 1, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [[-1, -3, -5, -6], 1, Concat, [1]],
   [-1, 1, Conv, [1024, 1, 1]],  # 50
  ]

# yolov7 head
head:
  [[-1, 1, SPPCSPC, [512]], # 51
  
   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [37, 1, Conv, [256, 1, 1]], # route backbone P4
   [[-1, -2], 1, Concat, [1]],
   
   [-1, 1, Conv, [256, 1, 1]],
   [-2, 1, Conv, [256, 1, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
   [-1, 1, Conv, [256, 1, 1]], # 63
   
   [-1, 1, Conv, [128, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [24, 1, Conv, [128, 1, 1]], # route backbone P3
   [[-1, -2], 1, Concat, [1]],
   
   [-1, 1, Conv, [128, 1, 1]],
   [-2, 1, Conv, [128, 1, 1]],
   [-1, 1, Conv, [64, 3, 1]],
   [-1, 1, Conv, [64, 3, 1]],
   [-1, 1, Conv, [64, 3, 1]],
   [-1, 1, Conv, [64, 3, 1]],
   [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
   [-1, 1, Conv, [128, 1, 1]], # 75
      
   [-1, 1, MP, []],
   [-1, 1, Conv, [128, 1, 1]],
   [-3, 1, Conv, [128, 1, 1]],
   [-1, 1, Conv, [128, 3, 2]],
   [[-1, -3, 63], 1, Concat, [1]],
   
   [-1, 1, Conv, [256, 1, 1]],
   [-2, 1, Conv, [256, 1, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [-1, 1, Conv, [128, 3, 1]],
   [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
   [-1, 1, Conv, [256, 1, 1]], # 88
      
   [-1, 1, MP, []],
   [-1, 1, Conv, [256, 1, 1]],
   [-3, 1, Conv, [256, 1, 1]],
   [-1, 1, Conv, [256, 3, 2]],
   [[-1, -3, 51], 1, Concat, [1]],
   
   [-1, 1, Conv, [512, 1, 1]],
   [-2, 1, Conv, [512, 1, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [-1, 1, Conv, [256, 3, 1]],
   [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
   [-1, 1, Conv, [512, 1, 1]], # 101
   
   [75, 1, RepConv, [256, 3, 1]],
   [88, 1, RepConv, [512, 3, 1]],
   [101, 1, RepConv, [1024, 3, 1]],

   [[102,103,104], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5)
  ]""")

In [152]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt

# Load the data from the competition
df = pd.read_csv('dataset/train.csv')
test = pd.read_csv('dataset/test.csv')
submission = pd.read_csv('dataset/sample_submission.csv')

# Convert classes and coordinates to integer
df["class"] = df['class'].astype('int')
df["xmax"]  = df['xmax'].astype('int')
df["xmin"]  = df['xmin'].astype('int')
df["ymax"]  = df['ymax'].astype('int')
df["ymin"]  = df['ymin'].astype('int')

In [125]:
# Divide the images into train and val
# Select ratio
ratio = 0.90
  
total_rows = df.shape[0]
train_size = int(total_rows*ratio)

# Split data into train and val
train = df[0:train_size]
val = df[train_size:]

# from sklearn.model_selection import train_test_split

# train, val = train_test_split(df, test_size=0.1, random_state=101)

In [126]:
# For scalability, xmax may has value less the xmin therfore sort them.
def sorting(l1, l2):
    if l1 > l2:
        lmax, lmin = l1, l2
        return lmax, lmin
    else:
        lmax, lmin = l2, l1
        return lmax, lmin

In [127]:
from google.colab.patches import cv2_imshow
from matplotlib.patches import Rectangle
import matplotlib.image as mpimg
import cv2

imagePaths = df['image_path'].unique()

# Double the value of (xmin,ymin,xmax,ymax) to adjust from 960x540 to 1920,1080
adjust = 2

# Image size
width = 1920
height = 1080

for imagePath in imagePaths:
  data = df.loc[df['image_path'] == imagePath]

  for i in data.index:

    xmax, xmin = sorting(data['xmax'][i], data['xmin'][i])
    ymax, ymin = sorting(data['ymax'][i], data['ymin'][i])

    # For scalability, no negative values or out-of-image coordinates are considered.
    xmin = max(0, xmin * adjust)
    ymin = max(0, ymin * adjust)
    xmax = min(width, xmax * adjust)
    ymax = min(height, ymax * adjust)

    # height, width and the center of x and y for Yolo
    x = ((xmin + xmax) / 2) / width
    y = ((ymin + ymin) / 2) / height
    w = (xmax - xmin) / width
    h = (ymax - ymin) / height

    # Remove .jpg only image has is taken
    image_path = data['image_path'][i].split('.')[0]

    # Write to file with the Yolo format
    with open(f"dataset/labels/train/{image_path}.txt", 'a') as writefile:
      writefile.write(f"{data['class'][i]} {x} {y} {w} {h}\n")

In [128]:
import os

# Place val images in val folder
for filePath in val['image_path'].unique():
    os.replace(f"dataset/images/train/{filePath}", f"dataset/images/val/{filePath}")

# Place val txt in val folder
for filePath in val['image_path'].unique():
    filePath = filePath.split('.')[0]
    os.replace(f"dataset/labels/train/{filePath}.txt", f"dataset/labels/val/{filePath}.txt")

# Place test images in test folder
for filePath in test['image_path'].unique():
    os.replace(f"dataset/images/train/{filePath}", f"dataset/images/test/{filePath}")

In [138]:
# Training using Yolov7
! python train.py --workers 8 --device 0 --epochs 50 --batch-size 62 --data data/dataset.yaml --img 640 640 --cfg cfg/training/yolov7_custom.yaml --weights '' --name yolov7_custom --hyp data/hyp.scratch.p5.yaml

YOLOR 🚀 v0.1-121-g2fdc7f1 torch 1.13.1+cu116 CUDA:0 (A100-SXM4-40GB, 40536.1875MB)

Namespace(adam=False, artifact_alias='latest', batch_size=62, bbox_interval=-1, bucket='', cache_images=False, cfg='cfg/training/yolov7_custom.yaml', data='data/dataset.yaml', device='0', entity=None, epochs=50, evolve=False, exist_ok=False, freeze=[0], global_rank=-1, hyp='data/hyp.scratch.p5.yaml', image_weights=False, img_size=[640, 640], label_smoothing=0.0, linear_lr=False, local_rank=-1, multi_scale=False, name='yolov7_custom', noautoanchor=False, nosave=False, notest=False, project='runs/train', quad=False, rect=False, resume=False, save_dir='runs/train/yolov7_custom6', save_period=-1, single_cls=False, sync_bn=False, total_batch_size=62, upload_dataset=False, v5_metric=False, weights='', workers=16, world_size=1)
[34m[1mtensorboard: [0mStart with 'tensorboard --logdir runs/train', view at http://localhost:6006/
2023-01-21 12:23:53.974208: I tensorflow/core/util/util.cc:169] oneDNN custom oper

In [140]:
# Inference
!python detect.py --source 'dataset/images/test' --weights 'runs/train/yolov7_custom6/weights/best.pt' --img 640 --save-txt --save-conf --exist-ok

Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', exist_ok=True, img_size=640, iou_thres=0.45, name='exp', no_trace=False, nosave=False, project='runs/detect', save_conf=True, save_txt=True, source='dataset/images/test', update=False, view_img=False, weights=['runs/train/yolov7_custom6/weights/best.pt'])
YOLOR 🚀 v0.1-121-g2fdc7f1 torch 1.13.1+cu116 CUDA:0 (A100-SXM4-40GB, 40536.1875MB)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse
Model Summary: 314 layers, 36535712 parameters, 6194944 gradients
 Convert model to Traced-model... 
 traced_script_module saved! 
 model is traced! 

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
1 GARBAGE, Done. (11.5ms) Inference, (1.7ms) NMS
 The image with the result is saved in: runs/detect/exp/006e04a34f54d116f6aff2bb31cb320c.jpg
1 GRAFFITI, Done. (13.5ms) Inference, (1.1ms) NMS
 The image with the result is saved in: runs/detec

In [183]:
import pybboxes as pbx

cols = ['class', 'x', 'y', 'w', 'h', 'conf-score']

classes = ['GRAFFITI', 'FADED_SIGNAGE', 'POTHOLES', 'GARBAGE', 'CONSTRUCTION_ROAD', 'BROKEN_SIGNAGE', 'BAD_STREETLIGHT',
           'BAD_BILLBOARD', 'SAND_ON_ROAD','CLUTTER_SIDEWALK', 'UNKEPT_FACADE']

for filePath in test['image_path'].unique():
  filePath = filePath.split('.')[0]
  path = f"runs/detect/exp/labels/{filePath}.txt"
  if not os.path.exists(path):
    continue
  df_results = pd.read_csv(path, sep=" ", header=None)
  df_results.columns = cols
  for i, item in df_results.iterrows():
    detectedClass = int(item['class'])
    # Convert back to VOC format (xmin,ymin,xmax,ymax)
    xmin, xmax, ymin, ymax = pbx.convert_bbox((item['x'], item['y'], item['w'], item['h']), from_type="yolo", to_type="voc", image_size=(960, 540))
    submission.loc[len(submission)] = [detectedClass, f"{filePath}.jpg", classes[detectedClass], xmin, xmax, ymin, ymax]