In [1]:
!pip install opencv-python
!pip install opencv-python-headless 



In [8]:
import cv2
import numpy as np

# Load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Load video
cap = cv2.VideoCapture('video.mp4')
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

# Define codec and create VideoWriter object
out = cv2.VideoWriter('output_video.avi', cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width, frame_height))

# Loading the COCO class labels our YOLO model was trained on
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

while True:
    ret, img = cap.read()
    if not ret:
        break

    height, width, channels = img.shape

    # Detecting objects
    blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    # Showing information on the screen
    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and class_id == 0:  # Class ID 0 is a person in COCO
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            color = (0, 255, 0)
            cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
            cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
    
    cv2.imshow("Image", img)
    out.write(img)
    key = cv2.waitKey(1)
    if key == 27:  # press ESC to stop
        break

cap.release()
out.release()
cv2.destroyAllWindows()

IndexError: invalid index to scalar variable.

In [5]:
!wget https://pjreddie.com/media/files/yolov3.weights
!wget https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg

'wget' is not recognized as an internal or external command,
operable program or batch file.
'wget' is not recognized as an internal or external command,
operable program or batch file.


In [2]:
!pip install requests



In [3]:
import requests

def download_file(url, filename):
    # Send a HTTP request to the server and save the HTTP response in a response object called r
    with requests.get(url, stream=True) as r:
        r.raise_for_status()  # Check for errors
        # Open a local file with write-binary mode
        with open(filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192): 
                # Write the contents of the response (r.content) to a new file in binary mode.
                f.write(chunk)

# URLs of the YOLOv3 weights and config
weights_url = "https://pjreddie.com/media/files/yolov3.weights"
cfg_url = "https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg"

# File paths to save weights and config
weights_path = "yolov3.weights"
cfg_path = "yolov3.cfg"

# Download files
download_file(weights_url, weights_path)
download_file(cfg_url, cfg_path)


In [4]:
import requests

url = 'https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names'
r = requests.get(url, allow_redirects=True)
open('coco.names', 'wb').write(r.content)


625

In [13]:
import cv2
import numpy as np

# Load the pre-trained YOLO model from the disk
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers().flatten()]

# Initialize the video capture
cap = cv2.VideoCapture('video.mp4')
frame_width, frame_height = int(cap.get(3)), int(cap.get(4))

# Prepare to write the processed video
out = cv2.VideoWriter('output_video.avi', cv2.VideoWriter_fourcc(*'MJPG'), 10, (frame_width, frame_height))

# Load class labels (like 'person')
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

while True:
    ret, frame = cap.read()
    if not ret:
        break

    height, width, _ = frame.shape

    # Prepare the frame for YOLO (blob from image)
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward(output_layers)

    # Post-process the detections
    boxes, confidences, class_ids = [], [], []
    for output in detections:
        for detect in output:
            scores = detect[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:  # Filter out weak detections
                center_x, center_y = int(detect[0] * width), int(detect[1] * height)
                w, h = int(detect[2] * width), int(detect[3] * height)
                x, y = int(center_x - w / 2), int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply non-max suppression to suppress weak, overlapping bounding boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    # Draw bounding boxes on the frame
    for i in indices:
        i = i[0]  # Accessing the first element if indices are a list of lists
        box = boxes[i]
        if class_ids[i] == 0:  # Index 0 is the 'person' class
            x, y, w, h = box
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            label = classes[class_ids[i]]
            cv2.putText(frame, label, (x, y + 30), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 3)

    # Display the frame
    cv2.imshow('Video', frame)
    out.write(frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
        break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()


IndexError: invalid index to scalar variable.

In [5]:
import cv2
import numpy as np

# Load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()

# Get the output layers, handling the structure of the getUnconnectedOutLayers() output correctly
out_layer_indexes = net.getUnconnectedOutLayers()

# Some OpenCV versions return an array of arrays (with one element each), and some return a flat array
# This check handles both possibilities
if out_layer_indexes.ndim == 2 and out_layer_indexes.shape[1] == 1:
    output_layers = [layer_names[i[0] - 1] for i in out_layer_indexes]
else:
    output_layers = [layer_names[i - 1] for i in out_layer_indexes.flatten()]

# Load video
cap = cv2.VideoCapture('video.mp4')

# Process video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    height, width, _ = frame.shape

    # Convert frame to blob
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    # Process detections
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # Scale back up to the frame size
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                if class_id == 0:  # Detected a person
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    cv2.imshow("Frame", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
        break

cap.release()
cv2.destroyAllWindows()


In [16]:
# 1
# with displaying people count 

import cv2
import numpy as np

# Load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
out_layer_indexes = net.getUnconnectedOutLayers()

# Some OpenCV versions return an array of arrays (with one element each), and some return a flat array
# This check handles both possibilities
if out_layer_indexes.ndim == 2 and out_layer_indexes.shape[1] == 1:
    output_layers = [layer_names[i[0] - 1] for i in out_layer_indexes]
else:
    output_layers = [layer_names[i - 1] for i in out_layer_indexes.flatten()]

# Load video
cap = cv2.VideoCapture('video2.mp4')

# Process video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    height, width, _ = frame.shape
    # Initialize people count for the frame
    people_count = 0

    # Convert frame to blob
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    # Process detections
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # Scale back up to the frame size
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                if class_id == 0:  # Detected a person
                    people_count += 1
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Display the number of people detected on the frame
    cv2.putText(frame, f'People Count: {people_count}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    cv2.imshow("Frame", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
        break

cap.release()
cv2.destroyAllWindows()


In [9]:
import cv2
import numpy as np

# Load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
out_layer_indexes = net.getUnconnectedOutLayers()

# Some OpenCV versions return an array of arrays (with one element each), and some return a flat array
# This check handles both possibilities
if out_layer_indexes.ndim == 2 and out_layer_indexes.shape[1] == 1:
    output_layers = [layer_names[i[0] - 1] for i in out_layer_indexes]
else:
    output_layers = [layer_names[i - 1] for i in out_layer_indexes.flatten()]

# Load video
cap = cv2.VideoCapture('video.mp4')

# Process video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    height, width, _ = frame.shape
    # Initialize people count for the frame
    people_count = 0

    # Convert frame to blob
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    boxes = []
    confidences = []
    class_ids = []

    # Process detections
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # Scale back up to the frame size
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                # Append to lists
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply Non-Max Suppression to reduce overlapping boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    
    for index in indices:
        i = index[0]  # Correcting the way to access index, assuming index is a tuple or numpy array
        if class_ids[i] == 0:  # Check if the detected class is 'person'
            x, y, w, h = boxes[i]
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)  # Draw rectangle around the person


    # Display the number of people detected on the frame
    cv2.putText(frame, f'People Count: {people_count}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    cv2.imshow("Frame", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
        break

cap.release()
cv2.destroyAllWindows()


IndexError: invalid index to scalar variable.

In [14]:
# 2

import cv2
import numpy as np

# Load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers().flatten()]


# Define function to count people
def count_people(video_path):
    # Load video
    cap = cv2.VideoCapture(video_path)
    people_count_per_frame = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        height, width, channels = frame.shape

        # Detecting objects
        blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        net.setInput(blob)
        outs = net.forward(output_layers)

        # Information to count people
        class_ids = []
        confidences = []
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    # Object detected
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)

                    # Rectangle coordinates
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # We use NMS function in opencv to eliminate redundant overlapping boxes with lower confidences.
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        people_count = sum(1 for i in indexes.flatten() if class_ids[i] == 0) # Class id 0 is typically people in COCO dataset

        people_count_per_frame.append(people_count)

    cap.release()
    return people_count_per_frame

# Path to your video file or camera id
video_path = 'video.mp4'
frame_counts = count_people(video_path)
print(frame_counts)


KeyboardInterrupt: 

In [20]:
import cv2
import numpy as np

# Load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getLayerNames()
out_layer_indexes = net.getUnconnectedOutLayers()

if out_layer_indexes.ndim == 2 and out_layer_indexes.shape[1] == 1:
    output_layers = [layer_names[i[0] - 1] for i in out_layer_indexes]
else:
    output_layers = [layer_names[i - 1] for i in out_layer_indexes.flatten()]

cap = cv2.VideoCapture('video2.mp4')

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    height, width, _ = frame.shape
    people_count = 0
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)
    outs = net.forward(output_layers)

    boxes = []
    confidences = []
    class_ids = []

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                if class_id == 0:  # Person class
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

    # Applying Non-Maximum Suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    for i in indices.flatten():
        box = boxes[i]
        x, y, w, h = box
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
        people_count += 1

    cv2.putText(frame, f'People Count: {people_count}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    cv2.imshow("Frame", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
