In [None]:
#1.extract frame and convert in blob of size 320*320
#2.read yolo and set cpu or gpu and give input (blob) to it and get output from its output layer
#3.find object(xy, wh,conf,object) and apply non max supperation
#4.show detected object using convert x,y,w,h, by height ration and width ratio

In [2]:
import cv2
import numpy as np

# we are not going to bother with objects less than 30% probability
THRESHOLD = 0.3
# the lower the value: the fewer bounding boxes will remain
SUPPRESSION_THRESHOLD = 0.3
YOLO_IMAGE_SIZE = 320

In [19]:
def find_objects(model_outputs):
    bounding_box_locations = []
    class_ids = []
    confidence_values = []

    for output in model_outputs:
        for prediction in output:
            class_probabilities = prediction[5:]
            #print(prediction[4])
            class_id = np.argmax(class_probabilities)
            confidence = class_probabilities[class_id]

            if confidence > THRESHOLD:
                w, h = int(prediction[2] * YOLO_IMAGE_SIZE), int(prediction[3] * YOLO_IMAGE_SIZE)
                # the center of the bounding box (we should transform these values)
                x, y = int(prediction[0] * YOLO_IMAGE_SIZE - w / 2), int(prediction[1] * YOLO_IMAGE_SIZE - h / 2)
                bounding_box_locations.append([x, y, w, h])
                class_ids.append(class_id)
                confidence_values.append(float(confidence))

    box_indexes_to_keep = cv2.dnn.NMSBoxes(bounding_box_locations, confidence_values, THRESHOLD, SUPPRESSION_THRESHOLD)

    return box_indexes_to_keep, bounding_box_locations, class_ids, confidence_values



In [17]:
def show_detected_objects(img, bounding_box_ids, all_bounding_boxes, class_ids, confidence_values, width_ratio,
                         height_ratio):
    for index in bounding_box_ids:
        bounding_box = all_bounding_boxes[index]
        x, y, w, h = int(bounding_box[0]), int(bounding_box[1]), int(bounding_box[2]), int(bounding_box[3])
        # we have to transform the locations adn coordinates because the resized image
        x = int(x*width_ratio)
        y = int(y * height_ratio)
        w = int(w * width_ratio)
        h = int(h * height_ratio)

        # OpenCV deals with BGR blue green red (255,0,0) then it is the blue color
        # we are not going to detect every objects just PERSON and CAR
        if class_ids[index] == 2:
            cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
            class_with_confidence = 'CAR' + str(int(confidence_values[index] * 100)) + '%'
            cv2.putText(img, class_with_confidence, (x, y-10), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, (255, 0, 0), 1)

        if class_ids[index] == 0:
            cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
            class_with_confidence = 'PERSON' + str(int(confidence_values[index] * 100)) + '%'
            cv2.putText(img, class_with_confidence, (x, y-10), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, (255, 0, 0), 1)


In [29]:
capture = cv2.VideoCapture('D:yolo_test.mp4')

# there are 80 (90) possible output classes
# 0: person - 2: car - 5: bus
classes = ['car', 'person', 'bus']

neural_network = cv2.dnn.readNetFromDarknet('D:yolov3.cfg', 'D:yolov3.weights')
# define whether we run the algorithm with CPU or with GPU
# WE ARE GOING TO USE CPU--
neural_network.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
neural_network.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

while True:

    # after reading a frame (so basically one image) we just have to do
    # the same operations as with single images !!!
    frame_grabbed, frame = capture.read()

    if not frame_grabbed:
        break

    original_width, original_height = frame.shape[1], frame.shape[0]

    # the image into a BLOB [0-1] RGB - BGR
    blob = cv2.dnn.blobFromImage(frame, 1 / 255, (YOLO_IMAGE_SIZE, YOLO_IMAGE_SIZE), True, crop=False)
    neural_network.setInput(blob)

    layer_names = neural_network.getLayerNames()
    # YOLO network has 3 output layer - note: these indexes are starting with 1
 
    output_names = [layer_names[(index) - 1] for index in neural_network.getUnconnectedOutLayers()]

    outputs = neural_network.forward(output_names)
    predicted_objects, bbox_locations, class_label_ids, conf_values = find_objects(outputs)
    show_detected_objects(frame, predicted_objects, bbox_locations, class_label_ids, conf_values,
                     original_width / YOLO_IMAGE_SIZE, original_height / YOLO_IMAGE_SIZE)

    cv2.imshow('YOLO Algorithm', frame)
    key=cv2.waitKey(1)
    if(key==ord('q')):
            break

capture.release()
cv2.destroyAllWindows()


In [None]:
#using SSD

In [4]:
import cv2
import numpy as np

# we are not going to bother with objects less than 50% probability
THRESHOLD = 0.5
# the lower the value: the fewer bounding boxes will remain
SUPPRESSION_THRESHOLD = 0.2
SSD_INPUT_SIZE = 320

In [5]:
# read the class labels
def construct_class_names(file_name='D:class_names'):
    with open(file_name, 'rt') as file:
        names = file.read().rstrip('\n').split('\n')

    return names


class_names = construct_class_names()

In [6]:
def show_detected_objects(img, boxes_to_keep, all_bounding_boxes, object_names, class_ids):
    for index in boxes_to_keep:
        box = all_bounding_boxes[index]
        x, y, w, h = box[0], box[1], box[2], box[3]
        cv2.rectangle(img, (x, y), (x + w, y + h), color=(0, 255, 0), thickness=2)
        cv2.putText(img, object_names[class_ids[index] - 1].upper(), (box[0], box[1] - 10),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.7, (0, 255, 0), 1)


In [7]:
capture = cv2.VideoCapture('D:objects.mp4')

neural_network = cv2.dnn_DetectionModel('D:ssd_weights.pb', 'D:ssd_mobilenet_coco_cfg.pbtxt')
# define whether we run the algorithm with CPU or with GPU
# TO USE CPU --
neural_network.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
neural_network.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
neural_network.setInputSize(SSD_INPUT_SIZE, SSD_INPUT_SIZE)
neural_network.setInputScale(1.0/127.5)
neural_network.setInputMean((127.5, 127.5, 127.5))
neural_network.setInputSwapRB(True)

while True:

    is_grabbed, frame = capture.read()

    if not is_grabbed:
        break

    class_label_ids, confidences, bbox = neural_network.detect(frame)
    bbox = list(bbox)
    confidences = np.array(confidences).reshape(1, -1).tolist()[0]

    # these are the indexes of the bounding boxes we have to keep
    box_to_keep = cv2.dnn.NMSBoxes(bbox, confidences, THRESHOLD, SUPPRESSION_THRESHOLD)

    show_detected_objects(frame, box_to_keep, bbox, class_names, class_label_ids)

    cv2.imshow('SSD Algorithm', frame)
    key=cv2.waitKey(1)
    if(key==ord('q')):
            break
capture.release()
cv2.destroyAllWindows()
