# Mounting Google Drive to Notebook

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


# Installing Ultralytics Package

In [None]:
!pip install ultralytics -q

In [None]:
import ultralytics
ultralytics.__version__

'8.1.11'

In [None]:
import torch
torch.__version__

'2.1.0+cu121'

In [None]:
torch.cuda.get_device_name(0)

'Tesla T4'

# Installing Required Libraries

In [None]:
from ultralytics import YOLO

import time
import torch
import cv2
from PIL import Image
import numpy as np
import torch.backends.cudnn as cudnn
# a module that provides access to functionality related to NVIDIA's CUDA Deep Neural Network library (cuDNN)
# accelerates GPU Performance.

# Taking Inference on Video Samples and **Counting the no. of Helmets.**
# **Detection Threshold = 0.6**
# YOLOv8 (yolov8m.pt) Model fine-tuned on custom Dataset.

In [None]:
from ultralytics import YOLO
import cv2
import os
from tqdm import tqdm

model = YOLO("/content/drive/MyDrive/Object_Counting_YOLOv8/best.pt") # load a trained model fine-tuned on your custom dataset

class_names =[ 'helmet', 'head', 'person']    # the class objects

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Define the video paths
video_paths = ['/content/drive/MyDrive/Object_Counting_YOLOv8/videos/1.mp4','/content/drive/MyDrive/Object_Counting_YOLOv8/videos/2.mp4',
               '/content/drive/MyDrive/Object_Counting_YOLOv8/videos/3.mp4',
               '/content/drive/MyDrive/Object_Counting_YOLOv8/videos/4.mp4']

output_directory = '/content/drive/MyDrive/Object_Counting_YOLOv8/output_videos'

os.makedirs(output_directory, exist_ok = True)

for video_path in tqdm(video_paths):

  cap = cv2.VideoCapture(video_path) #cv2 is pythonic librabry of OpenCV
  #used to capture video frames from a video or a connected camera.

  # Get the video properties
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  fps = cap.get(cv2.CAP_PROP_FPS)

  # Define the codec and create VideoWriter object
  # Define the codec to use (MP4V)
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')

  # Define the path where the output video will be saved

  output_path = os.path.join(output_directory, f'output_{os.path.basename(video_path)}')
  out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

  while cap.isOpened():
    ret, frame = cap.read() #reads the next frame from the video source
    #ret : boolean variable that indicates whether the frame was read successfully or not
    #frame : stores the actual video frame that was read by the cap.read() method.

    if not ret:
        break   #Exit loop if no more frames are available

    og_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    #converts the color space of the input image (frame) from BGR to RGB.
    #frame: This is the input image, represented as a NumPy array or tensor.
    #BGR is the default color space used by OpenCV when reading images.
    #While Matplotlib expects images in RGB format when displaying.

    detection_results = model.predict(og_frame,classes = 0, conf = 0.6) #setting confidence detection thresshold = 0.6

    boxes = detection_results[0].boxes.xyxy.cpu().tolist()   # Boxes object for bbox outputs
    classes = detection_results[0].boxes.cls.cpu().tolist()  # Class probabilities for classification outputs
    scores = detection_results[0].boxes.conf.cpu().tolist()  # confidence score
    # and converting all of them into lists.

    for i in range(len(boxes)):
     box = boxes[i]
     cls = classes[i]
     score = round(scores[i],2)
     x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
     cv2.rectangle(og_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
     #cv2.putText(og_frame, str(score), (x1+10, y1-5), cv2.FONT_HERSHEY_SIMPLEX,
              #   0.5, (255,0,0), 1, cv2.LINE_AA)

    cv2.putText(og_frame, f"Helmets Count : {len(boxes)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                    1, (0, 0, 0), 2, cv2.LINE_AA)
   # Syntax of PutText
   # (Image on which text will be drawn, Text to be drawn, Position of the text (top-left corner),Font type
   #  Font scale (size),Text color (in BGR format), Thickness of the text, Type of line for text rendering)

    out.write(cv2.cvtColor(og_frame, cv2.COLOR_RGB2BGR)) # writes back the prediceted frames to Video File one by one

# these lines ensure that all resources associated
# with video capture, video writing, and OpenCV windows are properly released and closed,
# to free up system resources and to provide a clean exit for your program
cap.release()
out.release()
cv2.destroyAllWindows()

  0%|          | 0/5 [00:00<?, ?it/s]


0: 384x640 (no detections), 26.0ms
Speed: 2.5ms preprocess, 26.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.2ms
Speed: 1.2ms preprocess, 25.2ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.3ms
Speed: 1.0ms preprocess, 25.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.1ms
Speed: 1.4ms preprocess, 25.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.2ms
Speed: 1.2ms preprocess, 25.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 helmet, 25.2ms
Speed: 1.2ms preprocess, 25.2ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.5ms
Speed: 2.0ms preprocess, 25.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 22.1ms
Speed: 2.3ms preprocess, 22.1ms inferenc

 20%|██        | 1/5 [00:19<01:19, 20.00s/it]


0: 384x640 2 helmets, 12.3ms
Speed: 3.0ms preprocess, 12.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.1ms
Speed: 1.6ms preprocess, 11.1ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.3ms
Speed: 1.9ms preprocess, 11.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 13.0ms
Speed: 1.6ms preprocess, 13.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 15.2ms
Speed: 1.5ms preprocess, 15.2ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.4ms
Speed: 1.2ms preprocess, 12.4ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.3ms
Speed: 2.2ms preprocess, 11.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.4ms
Speed: 1.9ms preprocess, 12.4ms inference, 1.4ms postprocess per image at shape (

 40%|████      | 2/5 [00:33<00:47, 15.93s/it]


0: 384x640 2 helmets, 11.9ms
Speed: 1.4ms preprocess, 11.9ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.7ms
Speed: 2.9ms preprocess, 11.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.3ms
Speed: 1.2ms preprocess, 12.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.5ms
Speed: 1.1ms preprocess, 12.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.3ms
Speed: 1.4ms preprocess, 12.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.6ms
Speed: 1.1ms preprocess, 12.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.0ms
Speed: 1.1ms preprocess, 12.0ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.9ms
Speed: 1.4ms preprocess, 12.9ms inference, 1.6ms postprocess per image at shape (

 60%|██████    | 3/5 [00:49<00:32, 16.02s/it]


0: 384x640 2 helmets, 12.0ms
Speed: 2.2ms preprocess, 12.0ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.2ms
Speed: 1.2ms preprocess, 11.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 13.2ms
Speed: 1.0ms preprocess, 13.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.5ms
Speed: 1.5ms preprocess, 11.5ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.9ms
Speed: 1.3ms preprocess, 11.9ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.0ms
Speed: 1.1ms preprocess, 12.0ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.7ms
Speed: 3.1ms preprocess, 11.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 16.2ms
Speed: 1.1ms preprocess, 16.2ms inference, 2.9ms postprocess per image at shape (

 80%|████████  | 4/5 [00:59<00:13, 13.70s/it]


0: 384x640 2 helmets, 14.0ms
Speed: 1.8ms preprocess, 14.0ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.0ms
Speed: 1.1ms preprocess, 11.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 13.0ms
Speed: 1.2ms preprocess, 13.0ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.4ms
Speed: 2.0ms preprocess, 11.4ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.1ms
Speed: 1.2ms preprocess, 12.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 11.9ms
Speed: 1.1ms preprocess, 11.9ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.3ms
Speed: 1.4ms preprocess, 12.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 helmets, 12.2ms
Speed: 1.1ms preprocess, 12.2ms inference, 1.3ms postprocess per image at shape (

100%|██████████| 5/5 [01:13<00:00, 14.77s/it]
