In [None]:
import cv2
import numpy

In [None]:
with open('res/yolov3.txt', 'r') as file:
    classes = [line.strip() for line in file.readlines()]

config = 'res/yolov3.cfg'

weights = 'res/yolov3.weights'
    
colors = numpy.random.uniform(0, 255, size=(len(classes), 3))

In [None]:
yolo = cv2.dnn.readNet(weights, config)
numbers_threshold = 0.4
confidence_threshold = 0.9

In [None]:
def get_output_layers():
    global yolo
    layer_names = yolo.getLayerNames()
    output_layers = [layer_names[i - 1] for i in yolo.getUnconnectedOutLayers()]
    return output_layers

In [None]:
def draw_predictions(image, indices, boxes, class_ids, confidences):
    global colors
    
    count = 0
    for i in indices:
        box = boxes[i]
        x, y, h, w = map(int, box)
        print(x, y, h, w)
        class_id = class_ids[i]
        confidence = confidences[i]
        label = str(classes[class_id])
        if label == 'person':
            count += 1
        color = colors[class_id]
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        cv2.putText(image, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    
    cv2.putText(image, f'Count: {count}', (40, 70), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 2)

In [None]:
def detect(image):
    global yolo
    
    width = image.shape[1]
    height = image.shape[0]
    
    scale = 0.00392
    blob = cv2.dnn.blobFromImage(image, scale, (416, 416), (0, 0, 0), True, crop=False)
    yolo.setInput(blob)
    
    class_ids, confidences, boxes = [], [], []
    outs = yolo.forward(get_output_layers())
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = numpy.argmax(scores)
            confidence = scores[class_id]
            if confidence > confidence_threshold:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = center_x - w / 2
                y = center_y - h / 2
                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append((x, y, w, h))
    
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, numbers_threshold)
    
    draw_predictions(image, indices, boxes, class_ids, confidences)
        
    return image

In [None]:
from IPython.display import display, Image

def image_from_camera():
    video = cv2.VideoCapture(0)
    _, image = video.read()
    video.release()
    return image

def image_from_file(filepath):
    return cv2.imread(filepath)

def show(image):
    window = display(None, display_id=True)
    _, image = cv2.imencode('.jpeg', image)
    window.update(Image(data = image.tobytes()))

In [None]:
show(detect(image_from_camera()))

In [None]:
show(detect(image_from_file('res/medium-people-6.jpg')))