### Pre requisites

In [12]:
!pip install deep_sort_realtime


Collecting deep_sort_realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
   ---------------------------------------- 0.0/8.4 MB ? eta -:--:--
   ------------- -------------------------- 2.9/8.4 MB 18.6 MB/s eta 0:00:01
   ------------------------------------ --- 7.6/8.4 MB 21.3 MB/s eta 0:00:01
   ---------------------------------------- 8.4/8.4 MB 17.4 MB/s eta 0:00:00
Installing collected packages: deep_sort_realtime
Successfully installed deep_sort_realtime-1.3.2


### Without Pre-Processing

In [4]:
import cv2
import torch
import numpy as np
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort  # DeepSORT Tracker

# Load YOLOv8 model
model = YOLO("runs/detect/train2/best.pt")  # Change path to your trained model

# Initialize DeepSORT Tracker
tracker = DeepSort(max_age=50, embedder="mobilenet", embedder_gpu=True)

# Open video file
cap = cv2.VideoCapture("istockphoto-2155400562-640_adpp_is.mp4")  # Change to your video

counted_objects = set()  # Store counted object IDs

# Confidence threshold to filter detections
CONF_THRESHOLD = 0.4  

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 inference
    results = model(frame)

    detections = []
    for result in results:
        for box in result.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = box

            # Apply confidence threshold
            if score > CONF_THRESHOLD:
                detections.append([[x1, y1, x2 - x1, y2 - y1], score, int(class_id)])  # Convert format for DeepSORT

    # Track objects using DeepSORT
    tracked_objects = tracker.update_tracks(detections, frame=frame)

    for track in tracked_objects:
        if not track.is_confirmed():
            continue

        x1, y1, x2, y2 = track.to_ltrb()
        obj_id = track.track_id

        # Count each unique object only once
        if obj_id not in counted_objects:
            counted_objects.add(obj_id)

        # Draw bounding boxes & ID
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {obj_id}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display total count
    cv2.putText(frame, f"Total Boxes: {len(counted_objects)}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Show frame
    cv2.imshow("YOLOv8 + DeepSORT Counting", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 2 0s, 36.1ms
Speed: 6.4ms preprocess, 36.1ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 0s, 43.3ms
Speed: 6.0ms preprocess, 43.3ms inference, 6.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 0s, 43.0ms
Speed: 6.8ms preprocess, 43.0ms inference, 6.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 0s, 39.3ms
Speed: 6.0ms preprocess, 39.3ms inference, 6.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 0s, 36.3ms
Speed: 5.1ms preprocess, 36.3ms inference, 6.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 0s, 41.8ms
Speed: 4.3ms preprocess, 41.8ms inference, 6.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 0s, 30.3ms
Speed: 4.5ms preprocess, 30.3ms inference, 5.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 0s, 39.7ms
Speed: 3.5ms preprocess, 39.7ms inference, 4.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 0s, 32.7ms

### With Pre-Processing

In [3]:
import cv2
import numpy as np
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort  # DeepSORT Tracker

# Load YOLOv8 model
model = YOLO("runs/detect/train2/best.pt")  # Change to your trained model

# Initialize DeepSORT Tracker
tracker = DeepSort(max_age=50, embedder="mobilenet", embedder_gpu=True)

# Open video file
cap = cv2.VideoCapture("mixkit-parcels-on-a-conveyor-belt-20770-hd-ready.mp4")  # Change to your video

# Adjust thresholds
CONF_THRESHOLD = 0.4  # Lower to detect more objects
BLUR_THRESHOLD = 80  # Lower value to prevent over-filtering

counted_objects = set()  # Store unique object IDs

def is_blurry(crop):
    """Check if object is blurry using Laplacian variance."""
    gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
    variance = cv2.Laplacian(gray, cv2.CV_64F).var()
    return variance < BLUR_THRESHOLD

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 inference
    results = model(frame)

    detections = []
    for result in results:
        for box in result.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = box

            # Apply confidence threshold
            if score < CONF_THRESHOLD:
                continue  

            # Crop detected object
            object_crop = frame[int(y1):int(y2), int(x1):int(x2)]
            if object_crop.size == 0 or is_blurry(object_crop):
                continue  

            detections.append([[x1, y1, x2 - x1, y2 - y1], score, int(class_id)])

    # Track objects using DeepSORT
    tracked_objects = tracker.update_tracks(detections, frame=frame)

    for track in tracked_objects:
        if not track.is_confirmed():
            continue

        x1, y1, x2, y2 = track.to_ltrb()
        obj_id = track.track_id

        # Add object ID to the counter set
        counted_objects.add(obj_id)

        # Draw bounding boxes & ID
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {obj_id}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display total count
    cv2.putText(frame, f"Total Boxes: {len(counted_objects)}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Show frame
    cv2.imshow("YOLOv8 + DeepSORT Counting", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 1 0, 45.3ms
Speed: 6.1ms preprocess, 45.3ms inference, 5.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 35.3ms
Speed: 5.1ms preprocess, 35.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 35.6ms
Speed: 4.7ms preprocess, 35.6ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 35.3ms
Speed: 5.5ms preprocess, 35.3ms inference, 3.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 46.6ms
Speed: 6.4ms preprocess, 46.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 32.9ms
Speed: 4.6ms preprocess, 32.9ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 36.7ms
Speed: 5.5ms preprocess, 36.7ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 32.7ms
Speed: 5.2ms preprocess, 32.7ms inference, 2.