In [27]:
import numpy as np
import cv2
import torch
from ultralytics import YOLO
import cvzone
import math
from sort.sort import * 


In [28]:
model = YOLO("yolov8l.pt")

In [29]:
class_names = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed" "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier","toothbrush"]

In [41]:
# Initialize video capture
cap = cv2.VideoCapture(r"C:\Users\49179\Desktop\Projects\Computer_vision_data\cars.mp4")
cap.set(3, 1080)
cap.set(4, 720)

# Initialize the SORT tracker
tracker = Sort(max_age=20, min_hits=3, iou_threshold=0.3)
limits = [100, 450, 450, 450]
totalCount = []

while True:
    ret, frame = cap.read()
    
    if not ret:
        break  # Break the loop if no frame is returned (end of video)
    
    # Flip and resize the frame
    flipped = cv2.resize(frame, (720, 720))
    ht, wt = flipped.shape[0:2]
    
    # Create a mask
    image = np.zeros_like(flipped)
    region = np.array([[(220, 250), (150, 450), (350, 450), (350, 250)]], np.int32)
    cv2.fillPoly(image, region, color=(255, 255, 255))
    masked_img = cv2.bitwise_and(flipped, image)

    # Process the frame with the model
    results = model(masked_img, stream=True)
    detections = np.empty((0, 5))
    
    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            w, h = x2 - x1, y2 - y1

            # Calculate confidence
            confidence = box.conf[0].item()
            confidence = math.ceil(confidence * 100) / 100

            # Get the class
            cls = int(box.cls[0])
            
            # Filter detections by class and confidence
            if (class_names[cls] in ["car", "truck", "bus", "motorbike"]) and confidence > 0.3:
                cvzone.putTextRect(flipped, f'{class_names[cls]} {confidence}', (max(0, x1), max(40, y1)), scale=0.6, thickness=1, offset=3)
                cvzone.cornerRect(flipped, (x1, y1, w, h), l=9)
                new_array = np.array([x1, y1, x2, y2, confidence])
                detections = np.vstack((detections, new_array))

    # Update tracker with the detections
    track_results = tracker.update(detections)
    cv2.line(flipped, (limits[0], limits[1]), (limits[2], limits[3]), (0, 0, 255), 5)
    
    # Draw the tracked objects
    for result in track_results:
        x1, y1, x2, y2, track_id = map(int, result)
        cv2.rectangle(flipped, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cvzone.putTextRect(flipped, f'ID: {track_id}', (x1, y1 - 10), scale=0.6, thickness=1, offset=3)

        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
        cv2.circle(flipped, (cx, cy), 5, (255, 0, 255), cv2.FILLED)

        print(f"Object ID: {track_id}, Center: ({cx}, {cy})")  # Debug print to track IDs and positions
 
        if limits[0] < cx < limits[2] and limits[1] - 15 < cy < limits[1] + 15:
            if track_id not in totalCount:
                totalCount.append(track_id)
                cv2.line(flipped, (limits[0], limits[1]), (limits[2], limits[3]), (0, 255, 0), 5)
                print(f"Counted ID: {track_id}, Total Count: {len(totalCount)}")  # Debug print for counting

    cvzone.putTextRect(flipped, f'Count: {len(totalCount)}', (50, 50))
    cv2.putText(flipped, str(len(totalCount)), (255, 100), cv2.FONT_HERSHEY_PLAIN, 5, (50, 50, 255), 8)

    # Display the frame
    cv2.imshow('Frame', flipped)
    
    # Break the loop if 'q' is pressed
    if cv2.waitKey(70) & 0xFF == ord('q'):
        break

# Release the capture and destroy all windows
cap.release()
cv2.destroyAllWindows()


0: 640x640 3 cars, 366.1ms
Speed: 7.0ms preprocess, 366.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
Object ID: 433, Center: (335, 372)
Object ID: 432, Center: (271, 259)
Object ID: 431, Center: (245, 399)

0: 640x640 3 cars, 52.8ms
Speed: 4.0ms preprocess, 52.8ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
Object ID: 433, Center: (334, 377)
Object ID: 432, Center: (269, 259)
Object ID: 431, Center: (243, 401)

0: 640x640 3 cars, 46.5ms
Speed: 4.0ms preprocess, 46.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
Object ID: 433, Center: (330, 398)
Object ID: 432, Center: (266, 263)
Object ID: 431, Center: (234, 409)

0: 640x640 3 cars, 47.0ms
Speed: 3.0ms preprocess, 47.0ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)
Object ID: 433, Center: (329, 423)
Object ID: 432, Center: (261, 270)
Object ID: 431, Center: (224, 423)

0: 640x640 2 cars, 46.0ms
Speed: 4.8ms preprocess, 46.0ms inference, 2.1ms postpr

In [6]:
ht,wt = frame.shape[0:2]

In [7]:
ht,wt

(478, 848)

In [8]:
def region_of_interest(image,region_point):
    mask= np.zeros_like(image)
    cv2.fillPoly(mask,region_point, 255)
    masked_img=cv2.bitwise_and(image, mask)
    return masked_img