# Yolo_Predictions

In [2]:
import cv2
import numpy as np
import os
import yaml
from yaml.loader import SafeLoader

*Read steps carefully*

In [3]:
# load YAML  
with open('../1_DataPreparation/data.yaml',mode='r') as f:
    data_yaml = yaml.load(f,Loader=SafeLoader)
    
labels = data_yaml['names']
print(labels)



# load image

# get the YOLO prediction from the image


['person', 'car', 'chair', 'bottle', 'pottedplant', 'bird', 'dog', 'sofa', 'bicycle', 'horse', 'boat', 'motorbike', 'cat', 'tvmonitor', 'cow', 'sheep', 'aeroplane', 'train', 'diningtable', 'bus']


In [4]:
# load model
yolo = cv2.dnn.readNetFromONNX('./Model/weights/best.onnx')
yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)


In [5]:
# Load image
img = cv2.imread('./street_image.jpg')
image = img.copy()
row,col,d = image.shape
print(row,col,d)

# get the YOLO prediction from the image
# step 1: convert image into square image
max_rc = max(row,col)
print(max_rc)
input_image = np.zeros((max_rc,max_rc,3),dtype=np.uint8)
input_image[0:row,0:col] = image[:,:]

# step 2: get prediction from square array
INPUT_WH_YOLO = 640   # this is the input size of yolo we trained
blob = cv2.dnn.blobFromImage(input_image,1/255,(INPUT_WH_YOLO,INPUT_WH_YOLO),swapRB=True,crop=False)
yolo.setInput(blob)

preds = yolo.forward() # detection or prediction from yolo


956 1920 3
1920


In [6]:
print(preds.shape)

(1, 25200, 25)


In [12]:
# Non Maximum Supression

# step 1: filter detection based on confidence threshold (0.4) and probability threshold (0.25)

detections = preds[0]
boxes = []
confidences =[]
classes = []

# width and height of original image (input_image)

image_w,image_h = input_image.shape[:2]
x_factor = image_w/INPUT_WH_YOLO
y_factor = image_h/INPUT_WH_YOLO


for i in range(len(detections)):
    row = detections[i]
    confidence = row[4]
    if confidence > 0.4:
        class_score = row[5:].max() # maximum probability of the object
        class_id = row[5:].argmax() # index of the class with maximum probability

        if class_score > 0.25:
            cx,cy,w,h = row[0:4]
            # construct bounding from four values
            # left, top , width and height
            left = int((cx-w/2)*x_factor)
            top = int((cy-h/2)*y_factor)
            width = int(w*x_factor)
            height = int(h*y_factor)

            box = np.array([left,top,width,height])
            
            # append values into the list
            confidences.append(confidence)
            boxes.append(box)
            classes.append(class_id)

# clean
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()

# NMS
index = cv2.dnn.NMSBoxes(boxes_np,confidences_np,0.25,0.45).flatten()



In [13]:
index 
# this means from this image we found 24 objects which are having the good confidence scroe and a good probability score
# it makes sense if you see the street_image.jpg, there are about 24 objects in the image that you can detect with your eyes easily

array([321, 136, 259, 292, 301, 203, 177, 113,  74, 154, 103, 105, 107,
       149, 192, 215, 318, 314, 135, 121, 276,   0, 129, 316,  72, 202,
        12, 335], dtype=int32)

In [17]:
# drow the bounding box

for ind in index:
    # extract bounding box
    x,y,w,h = boxes_np[ind]
    bb_conf = int(confidences_np[ind]*100)
    classes_id = classes[ind]
    class_name = labels[classes_id]

    text = f'{class_name} {bb_conf}%'

    cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)
    cv2.rectangle(image,(x,y-30),(x+w,y),(255,255,255),-1)
    cv2.putText(image,text,(x,y-10),cv2.FONT_HERSHEY_PLAIN,0.7,(0,0,0),1)


In [18]:
cv2.imshow('original',img)
cv2.imshow('yolo_prediction',image)
cv2.waitKey(0)
cv2.destroyAllWindows()