In [2]:
# Download YOLOv3 weights
!wget https://pjreddie.com/media/files/yolov3.weights

# Download YOLOv3 configuration file
!wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg

# Download COCO class names
!wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names

--2024-07-18 17:39:47--  https://pjreddie.com/media/files/yolov3.weights
Resolving pjreddie.com (pjreddie.com)... 162.0.215.52
Connecting to pjreddie.com (pjreddie.com)|162.0.215.52|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 248007048 (237M) [application/octet-stream]
Saving to: ‘yolov3.weights.1’


2024-07-18 17:39:55 (29.5 MB/s) - ‘yolov3.weights.1’ saved [248007048/248007048]

--2024-07-18 17:39:55--  https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8342 (8.1K) [text/plain]
Saving to: ‘yolov3.cfg.1’


2024-07-18 17:39:55 (68.3 MB/s) - ‘yolov3.cfg.1’ saved [8342/8342]

--2024-07-18 17:39:56--  https://raw.githubusercontent.com/pjreddie/darknet/master/data

In [3]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from google.colab import files

In [4]:
# Load the YOLO model
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

# Get the output layer names of the YOLO model
layer_names = net.getLayerNames()

In [5]:
output_layers_indices = net.getUnconnectedOutLayers()

# check if the indices are wrapped in an extra dimension and flatten if necessry

if output_layers_indices.ndim > 1:
    output_layers_indices = output_layers_indices.flatten()

# Convert the indices to layer names
output_layers = [layer_names[i - 1] for i in output_layers_indices]

In [6]:
output_layers

['yolo_82', 'yolo_94', 'yolo_106']

In [7]:
# Load class names from the coco.names file
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

coco_classes = [
    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
    "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
    "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
    "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
    "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
    "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza",
    "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet",
    "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
    "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
    "hair drier", "toothbrush"
]

In [8]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
# load video
cap = cv2.VideoCapture('/content/drive/MyDrive/videos/library.mp4')

In [10]:
if not cap.isOpened():
    print(f"cann't open video file")
    exit()

In [11]:
# extract video frames
ret, frame = cap.read()

height, width, channels = frame.shape

cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # reset video to the beginning

frames= []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    height, width, channels = frame.shape
    # Create a blob from the image
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

    # Set the input for the network
    net.setInput(blob)

    # Get the output of the network
    outs = net.forward(output_layers)
    # Process the Detections
    class_ids = []
    confidences = []
    boxes = []

    # Iterate over each detection output
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                # Calculate coordinates for the rectangle
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)


    # Apply Non-Max Suppression to remove overlapping bounding boxes
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Draw Bounding Boxes and Labels
    font = cv2.FONT_HERSHEY_SIMPLEX

    # Iterate over the detected objects and draw bounding boxes
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            confidence = confidences[i]
            label = f"{classes[class_ids[i]]}: {confidence:.2f}"
            color = (0, 255, 0)  # Green color for the bounding box

            # Draw the bounding box
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)

            # Calculate the position for the text
            (text_width, text_height), baseline = cv2.getTextSize(label, font, 0.5, 1)
            y_text = y - 5 if y - 5 > text_height else y + text_height + 5

            # Draw the text background rectangle
            cv2.rectangle(frame, (x, y_text - text_height - 2), (x + text_width, y_text + baseline - 2), color, cv2.FILLED)

            # Draw the text on the image
            cv2.putText(frame, label, (x, y_text - 2), font, 0.5, (0, 0, 0), 1)

    frames.append(frame)
cap.release()

KeyboardInterrupt: 

In [None]:
# save frames to videos
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('library_output.mp4', fourcc, 30.0, (width, height))

for frame in frames:
    out.write(frame)

out.release()

In [None]:
# Download the processed video files
try:
    from google.colab import files
    files.download('/content/drive/MyDrive/videos/library_output.mp4')
except Exception as e:
    print("Error during file download:", e)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>