In [2]:
import os
import time
import cv2
import numpy as np

In [3]:
image = 'images/student_id.jpg' #path to input image (will detect objects in this image using YOLO)
yolo = 'yolo-coco/' #base path to YOLO directory (will load the required YOLO files in order to perform object detection)
confidence_threshold = 0.5 #min probability to filter weak detections
threshold = 0.3 #non-maxima suppression threshold

In [4]:
# load the COCO class labels that our YOLO model was trained on
labelsPath = os.path.sep.join([yolo, "coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")

# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0,255,size=(len(LABELS),3),dtype="uint8")

In [5]:
# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([yolo, "yolov3.weights"])
configPath = os.path.sep.join([yolo, "yolov3.cfg"])

# load our YOLO object detector trained on COCO dataset (80 classes)
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath,weightsPath)

[INFO] loading YOLO from disk...


In [6]:
# load our input image and grab its spatial dimensions
image = cv2.imread(image)
(H,W) = image.shape[:2]

# determine only the *output* layer names that we need from YOLO
ln = net.getLayerNames()
ln = [ln[i[0]-1] for i in net.getUnconnectedOutLayers()]

# construct a blob from the input image and then perform a forward pass of the YOLO object detector
# , giving us our bounding boxes and associated probabilities
blob = cv2.dnn.blobFromImage(image, 1/255.0,(416,416),swapRB=True,crop=False)
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(ln) #perofrm a forward pass through YOLO network
end = time.time()

# show timing information on YOLO
print("[INFO] YOLO took {:.6f} secs".format(end-start))

[INFO] YOLO took 0.604439 secs


In [7]:
# initialize our lists of detected bounding boxes, confidences, and class IDs, respectively
boxes = []
confidences = [] #lower confidence value = object might not be what the network thinks it is
#NOTE: filter out objects that don't meet the threshold (0.5)
classIDs = []

In [8]:
layerOutputs[0][0]

array([4.7612704e-02, 3.3864547e-02, 2.8027838e-01, 1.8578026e-01,
       8.5099011e-10, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e

In [9]:
# loop over each of the layer outputs
for output in layerOutputs:
    # loop over each of the detections
    for detection in output:
        # extract class ID and confidence (probability) of the current object detection
        scores = detection[5:]
        classID = np.argmax(scores)
        confidence = scores[classID]

        # filter out weak predictions
        if confidence > confidence_threshold:
            # scale the bounding box coordinates back relative to the size of the image
            # NOTE: YOLO actually returns the center (x,y)-coordinates of the bounding box followed by the boxes' width and height
            box = detection[0:4] * np.array([W,H,W,H])
            (centerX,centerY,width,height) = box.astype("int")

            # use the center (x,y)-coordinate to derive the top left corner of the bounding box
            x = int(centerX-(width/2))
            y = int(centerY-(height/2))

            # update our lists
            boxes.append([x,y,int(width),int(height)])
            confidences.append(float(confidence))
            classIDs.append(classID)

In [10]:
# apply non-maxima suppresstion
idxs = cv2.dnn.NMSBoxes(boxes,confidences,confidence_threshold,threshold)
# this suppresses significantly overlapping vounding boxes, keeping only the most confident ones
# it also ensures that we do not have any redundant or extraneous bounding boxes

In [11]:
# draw the boxes and class text on the image
if len(idxs)>0: #ensure at least one detection exists
    for i in idxs.flatten():
        # extract the bounding box coordinates
        (x,y) = (boxes[i][0],boxes[i][1])
        (w,h) = (boxes[i][2],boxes[i][3])

        # draw a bounding box rectangle and label on the image
        color = [int(c) for c in COLORS[classIDs[i]]]
        cv2.rectangle(image, (x,y), (x+w,y+h), color, 2)
        text = "{}: {:.4f}".format(LABELS[classIDs[i]],confidences[i])
        cv2.putText(image,text,(x,y-5),cv2.FONT_HERSHEY_SIMPLEX,0.5,color,2)

# show the output image
cv2.imshow("Image",image)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.waitKey(1)

-1