In [14]:
from __future__ import division

import os
import time

import cv2
import numpy as np

# IMAGE WITH YOLOv3

In [15]:
LABELSPATH = '../../darknet/data/coco.names'
CONFIGPATH = '../../darknet/cfg/yolov3.cfg'
WEIGHTSPATH = '../../darknet/yolov3.weights'

CONFIDENCE_THS = 0.5
NMS_THS = 0.3

In [16]:
LABELS = open(LABELSPATH).read().strip().split("\n")

np.random.seed(42)
COLORS = np.random.randint(0,255,size=(len(LABELS),3),dtype="uint8")

net = cv2.dnn.readNetFromDarknet(CONFIGPATH,WEIGHTSPATH)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

In [17]:
def test_on_image(net,image):
    (H,W) = image.shape[:2]

    output_ln = net.getLayerNames()
    output_ln = [output_ln[i[0]-1] for i in net.getUnconnectedOutLayers()]

    blob = cv2.dnn.blobFromImage(image,1/255.0,(416,416),swapRB=True,crop=False)
    net.setInput(blob)
    start = time.time()
    layer_outputs = net.forward(output_ln)
    end = time.time()

    # output storages
    boxes = []
    confidences = []
    classIDs = []

    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            # filter out weak predictions
            if confidence > CONFIDENCE_THS:
                # scale the bounding box coordinates back relative to the size of the image
                box = detection[0:4] * np.array([W,H,W,H])
                (centerX,centerY,width,height) = box.astype("int")

                # derive the top left corner of the bounding box
                x = int(centerX-(width/2))
                y = int(centerY-(height/2))

                # update our lists
                boxes.append([x,y,int(width),int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)
    
    idxs = cv2.dnn.NMSBoxes(boxes,confidences,CONFIDENCE_THS,NMS_THS)

    return idxs,boxes,confidences,classIDs,end-start

In [18]:
def draw_outputs(idxs,boxes,confidences,classIDs,image):
    if len(idxs) > 0:
        for i in idxs.flatten():
            (x,y) = (boxes[i][0],boxes[i][1])
            (w,h) = (boxes[i][2],boxes[i][3])

            color = [int(c) for c in COLORS[classIDs[i]]]
            cv2.rectangle(image,(x,y),(x+w,y+h),color,2)
            text = "{}:{:.4f}".format(LABELS[classIDs[i]],confidences[i])
            print("[INFO] YOLO predicts {} with {:.6f} confidence".format(LABELS[classIDs[i]],confidences[i]))
            cv2.putText(image,text,(x,y-5),cv2.FONT_HERSHEY_SIMPLEX,0.5,color,2)
    
    return image

In [19]:
imagefile = '../images/dog-cycle-car.png'
image = cv2.imread(imagefile)
idxs,boxes,confidences,classIDs,duration = test_on_image(net,image)
print("[INFO] YOLO took {:.6f} secs".format(duration))
output_image = draw_outputs(idxs,boxes,confidences,classIDs,image)

[INFO] YOLO took 0.280167 secs
[INFO] YOLO predicts dog with 0.999602 confidence
[INFO] YOLO predicts bicycle with 0.993697 confidence
[INFO] YOLO predicts truck with 0.883534 confidence


[ WARN:0] global /home/yeojinjung/opencv/opencv-4.5.0/modules/dnn/src/cuda4dnn/init.hpp (42) checkVersions CUDART version 11010 reported by cuDNN 8005 does not match with the version reported by CUDART 10020


In [20]:
cv2.imshow("",output_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.waitKey(1)

-1

# WEBCAM WITH YOLOv3

In [21]:
LABELSPATH = '../../darknet/data/coco.names'
CONFIGPATH = '../../darknet/cfg/yolov3.cfg'
WEIGHTSPATH = '../../darknet/yolov3.weights'

CONFIDENCE_THS = 0.5
NMS_THS = 0.3

In [22]:
LABELS = open(LABELSPATH).read().strip().split("\n")

np.random.seed(42)
COLORS = np.random.randint(0,255,size=(len(LABELS),3),dtype="uint8")

net = cv2.dnn.readNetFromDarknet(CONFIGPATH,WEIGHTSPATH)

In [33]:
def test_on_webcam():
    # camset = 'v4l2src device=/dev/video2 ! video/x-raw,width=1280,height=800,format=(string)YUY2 ! videoconvert ! videoscale ! video/x-raw,width=640,height=480,format=BGR,framerate=30/1 ! appsink'
    camset = "v4l2src ! video/x-raw,format=YUY2,width=640,height=480,framerate=30/1 ! appsink"    

    cam = cv2.VideoCapture(2)
    cam.set(cv2.CAP_PROP_FRAME_WIDTH,640)
    cam.set(cv2.CAP_PROP_FRAME_HEIGHT,480)

    # counter and storage to compute average prediction time
    cnt = 0
    sum_durations = 0

    if cam.read() == False:
        cam.open()
    if not cam.isOpened():
        raise IOError("cannot open webcam")

    while cam.isOpened():
        # start = time.time()
        ret,frame = cam.read()

        if not ret:
            raise IOError("cannot receive frame")
        
        idxs,boxes,confidences,classIDs,duration = test_on_image(net,frame)
        sum_durations += duration
        cnt += 1
        
        if len(idxs) > 0 :
            for i in idxs.flatten():
                (x,y) = (boxes[i][0],boxes[i][1])
                (w,h) = (boxes[i][2],boxes[i][3])

                # draw bounding box and label on the frame
                color = [int(c) for c in COLORS[classIDs[i]]]
                text = "{}: {:.4f}".format(LABELS[classIDs[i]],confidences[i])
                frame = cv2.rectangle(frame,(x,y),(x+w,y+h),color,2)
                frame = cv2.putText(frame,text,(x,y-5),cv2.FONT_HERSHEY_SIMPLEX,0.5,color,2)

            
        cv2.imshow('',frame)
        if cv2.waitKey(1) == ord('q'):
            break

    cam.release()
    cv2.destroyAllWindows()

    print("[INFO] YOLO took {:.6f} secs per frame in average".format(sum_durations/cnt))

In [34]:
test_on_webcam()



[INFO] YOLO took 0.222480 secs per frame in average


# WEBCAM WITH YOLOv4

## Major Improvements
- BoF (bag of freebies) improve the accuracy of the detector without increasing the inference time (they only increase the training cost)
- BoS (bag of specials) improves the accuracy of object detection while increasing the inference cost by a small amount

## Performance
- mAP: 43.5% on COCO dataset (10% increase from YOLOv3)
- real-time speed: 65 FPS on Tesla V100 (12% increase from YOLOv3)

In [None]:
LABELSPATH = '../../darknet-alex/data/coco.names'
CONFIGPATH = '../../darknet-alex/cfg/yolov4.cfg'
WEIGHTSPATH = '../../darknet-alex/yolov4.weights'

CONFIDENCE_THS = 0.5
NMS_THS = 0.3

In [None]:
test_on_webcam()

[INFO] YOLO took 0.272537 secs per frame in average


# WEBCAM WITH TINY-YOLOv4

In [None]:
LABELSPATH = '../../darknet-alex/data/coco.names'
CONFIGPATH = '../../darknet-alex/cfg/yolov4-tiny.cfg'
WEIGHTSPATH = '../../darknet-alex/yolov4-tiny.weights'

CONFIDENCE_THS = 0.5
NMS_THS = 0.3

In [None]:
test_on_webcam()

[INFO] YOLO took 0.260241 secs per frame in average


# WEBCAM WITH PP-YOLO
- based on YOLOv3 in PaddleDetection
- goal: relatively balanced effectiveness and efficiency that can be directly applied in actual application scenarios rather than proposing a new detection model

## Major Improvements
- replace Darknet53 backbone of YOLOv3 with ResNet backbone (significant increase in the FPS)
- increase training batch size from 64 to 192 (as mini-batch size of 24 on 8 GPUs)

## Performance
- mAP: 45.2% on COCO dataset (14.6% relative improvement to YOLOv4)
- real-time speed: 72.9 FPS on Tesla V100

In [None]:
# source codes not found (not reachable)

# YOLOv5

## Major Improvements
- PyTorch implementation rather than a fork from original Darknet
- CSP backbone and PA-NET neck
- mosaic data augmentation
- auto learning bounding box anchors

## Performances
- FPS: 140 (with Tesla P100)
- weights file: 27 MB (v4: 244 MB; 90% smaller than V4)

In [None]:
import torch

model = torch.hub.load('ultralytics/yolov5','yolov5m')
img = 'https://ultralytics.com/images/zidane.jpg'
results = model(img)

results.print()

Using cache found in /home/funzin/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2021-8-25 torch 1.9.0+cu102 CPU



Downloading https://github.com/ultralytics/yolov5/releases/download/v5.0/yolov5m.pt to /home/funzin/.cache/torch/hub/ultralytics_yolov5_master/yolov5m.pt...


100%|██████████| 41.1M/41.1M [00:03<00:00, 11.4MB/s]
Fusing layers... 





Model Summary: 308 layers, 21356877 parameters, 0 gradients
Adding AutoShape... 
image 1/1: 720x1280 2 persons, 1 tie
Speed: 1081.4ms pre-process, 246.4ms inference, 0.8ms NMS per image at shape (1, 3, 384, 640)


In [None]:
results.show()

In [None]:
os.system("python ../../yolov5/detect.py --source 0")

2

In [None]:
CONFIGPATH = '../../yolov5/models/yolov5s.yaml'
WEIGHTSPATH = '../../yolov5/yolov5s.pt'


## NOTE: Architecture
- backbone: extract feature map from image
    - v3: Darknet53
    - v4, v5: CSP-Darknet
- head: locate object based on the extracted feature map
    - initialize anchor box (default box) and create final bounding box
    - 3 scales: 8 pixel, 16, 32
    - 3 anchor boxes/scale

# YOLOv4 TRAINED WITH CUSTOM DATASETS FOR RPS

## DATASET
- custom dataset of hands with 3 classes: rock, paper, scissors
- manually annotated
- 35 images in total: 30 for training (including augmentation), 5 for validation

## INFERENCE RESULT
- very poor
- identifies a lot of small patches in a frame, making it unable to detect meaningful (larger-size) objects


In [None]:
LABELSPATH = '../../darknet-alex/data/rps.names'
CONFIGPATH = '../../darknet-alex/cfg/rps.cfg'
WEIGHTSPATH = '../../darknet-alex/backup/rps_last.weights'

CONFIDENCE_THS = 0.5
NMS_THS = 0.3

In [None]:
LABELS = open(LABELSPATH).read().strip().split("\n")

np.random.seed(42)
COLORS = np.random.randint(0,255,size=(len(LABELS),3),dtype="uint8")

net = cv2.dnn.readNetFromDarknet(CONFIGPATH,WEIGHTSPATH)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

In [None]:
test_on_webcam()

[INFO] YOLO took 0.257525 secs per frame in average


In [None]:
imagefile = '../images/scissors.jpg'
image = cv2.imread(imagefile)
idxs,boxes,confidences,classIDs,duration = test_on_image(net,image)
print("[INFO] YOLO took {:.6f} secs".format(duration))
output_image = draw_outputs(idxs,boxes,confidences,classIDs,image)

cv2.imshow("",output_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.waitKey(1)

[INFO] YOLO took 0.478014 secs
[INFO] YOLO predicts scissors with 0.620088 confidence


-1