In [6]:
## Check CUDA availability
import torch
print(torch.cuda.is_available())  # Check if CUDA (GPU support) is available

True


In [7]:
# Import necessary libraries
import numpy as np
import cv2
import os
import imutils

In [8]:
# Define constants for Non-Maximum Suppression (NMS) and minimum confidence threshold
NMS_THRESHOLD = 0.3
MIN_CONFIDENCE = 0.2

In [9]:
# Function to detect pedestrians in an image
def pedestrian_detection(image, model, layer_name, personidz=0):
    (H, W) = image.shape[:2]  # Get image dimensions
    results = []

    # Create a blob from the image
    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    model.setInput(blob)  # Set the blob as input to the model
    layerOutputs = model.forward(layer_name)  # Get outputs from the model's layers

    boxes = []  # List to hold bounding boxes
    centroids = []  # List to hold centroids of detected objects
    confidences = []  # List to hold confidence scores

    # Loop over each output layer
    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]  # Get scores for each class
            classID = np.argmax(scores)  # Get the class with the highest score
            confidence = scores[classID]  # Get the confidence score

            # If the detected object is a person and confidence is above the threshold
            if classID == personidz and confidence > MIN_CONFIDENCE:
                box = detection[0:4] * np.array([W, H, W, H])  # Scale the bounding box
                (centerX, centerY, width, height) = box.astype("int")  # Get bounding box details

                # Calculate top-left corner of the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                boxes.append([x, y, int(width), int(height)])  # Append the bounding box
                centroids.append((centerX, centerY))  # Append the centroid
                confidences.append(float(confidence))  # Append the confidence score

    # Apply Non-Maximum Suppression to suppress weak and overlapping bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, MIN_CONFIDENCE, NMS_THRESHOLD)

    # Ensure at least one detection exists
    if len(idxs) > 0:
        for i in idxs.flatten():
            # Get the bounding box and centroid for the detection
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            r = (confidences[i], (x, y, x + w, y + h), centroids[i])
            results.append(r)  # Append the result

    return results

In [10]:
# Load the COCO class labels that the model was trained on
labelsPath = "coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

# Load the YOLO model weights and configuration
weights_path = "yolov4-tiny.weights"
config_path = "yolov4-tiny.cfg"

model = cv2.dnn.readNetFromDarknet(config_path, weights_path)

# Set preferable target to CUDA if available
model.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
model.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)


In [11]:
# Determine the output layer names from the YOLO model
layer_name = model.getLayerNames()
layer_name = [layer_name[i - 1] for i in model.getUnconnectedOutLayers()]
cap = cv2.VideoCapture("pedestrian-video.mp4")
writer = None

In [12]:
# Loop over the frames from the video stream
while True:
	(grabbed, image) = cap.read()  # Grab a frame

	if not grabbed: # If no frame was grabbed, break the loop
		break
	image = imutils.resize(image, width=700) # Resize the frame for faster processing
	results = pedestrian_detection(image, model, layer_name,
		personidz=LABELS.index("person")) # Detect pedestrians

	# Loop over the results and draw bounding boxes
	for res in results:
		cv2.rectangle(image, (res[1][0],res[1][1]), (res[1][2],res[1][3]), (0, 255, 0), 2)

	cv2.imshow("Detection",image) # Display the resulting frame

	key = cv2.waitKey(1)
	if key == 27: # If 'ESC' key is pressed, break the loop
		break

# Release the video stream and close windows
cap.release()
cv2.destroyAllWindows()