In [3]:
import cv2
import time
import numpy as np 
from ultralytics import YOLO  
import torch 
from deep_sort.utils.parser import get_config  
from deep_sort.deep_sort import DeepSort  
from deep_sort.sort.tracker import Tracker  

deep_sort_weights = 'deep_sort/deep/checkpoint/ckpt.t7'
tracker = DeepSort(model_path=deep_sort_weights, max_age=70)

video_path = 'v.mp4'
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
print(frame_height, frame_width)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output = 'output.mp4'
out = cv2.VideoWriter(output, fourcc, fps, (frame_width, frame_height))

downscale_factor = 2
frames = []

unique_track_ids = set()

trajectoire = {}
speed = {}
last_positions = {}  
last_times = {} 

model = YOLO("yolov11n.pt")

frame_times = []  # Initialize frame_times list

class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

while True:
    start_time = time.time()
    
    ret, frame = cap.read()
    if not ret:
        break
    
    current_time = time.time()
    og_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Specify the classes to detect (1: bicycle, 2: car, 3: motorcycle, 5: bus, 7: truck)
    results = model(frame, classes=[1, 2, 3, 5, 7], conf=0.8)
    
    all_xyxy = []
    all_xywh = []
    all_cls = []
    all_conf = []
    for result in results:
        boxes = result.boxes
        probs = result.probs
        xyxy = boxes.xyxy.detach().cpu().numpy()
        xywh = boxes.xywh.detach().cpu().numpy()
        cls = boxes.cls.tolist()
        conf = boxes.conf.detach().cpu().numpy()

        all_xyxy.extend(xyxy)
        all_xywh.extend(xywh)
        all_cls.extend(cls)
        all_conf.extend(conf)
            
    pred_cls = np.array(all_cls)
    conf = np.array(all_conf)
    bboxes_xyxy = np.array(all_xyxy, dtype=float)
    bboxes_xywh = np.array(all_xywh, dtype=float)
    
    if bboxes_xywh.ndim == 1 or bboxes_xywh.size == 0:
        print("No detections in this frame, skipping tracking update.")
        continue
    
    tracks = tracker.update(bboxes_xywh, conf, og_frame)
    
    meters_per_pixel = 0.05  
    for track in tracks:
        x1, y1, x2, y2, track_id = track
        
        width = x2 - x1 
        height = y2 - y1
        center_x = int((x1 + x2) / 2)
        center_y = int((y1 + y2) / 2)
        current_pos = (center_x, center_y)
        
        if track_id not in trajectoire:
            trajectoire[track_id] = []
            speed[track_id] = 0
            last_positions[track_id] = current_pos
            last_times[track_id] = current_time
        trajectoire[track_id].append(current_pos)
        
        # CALCULATE SPEED ONLY IF WE HAVE LAST POS
        if track_id in last_positions:
            time_diff = current_time - last_times[track_id]
            if time_diff > 0:  # Avoid division by zero
                # Calculate distance in meters
                x_diff = current_pos[0] - last_positions[track_id][0]
                y_diff = current_pos[1] - last_positions[track_id][1]
                distance = np.sqrt(x_diff**2 + y_diff**2) * meters_per_pixel  # Convert to meters
                
                # Calculate speed (meters per second)
                speed_mps = distance / time_diff
                # Convert to km/h
                speed[track_id] = speed_mps * 3.6  # Convert m/s to km/h
                
                # Debug prints
                print(f"Track ID: {track_id}")
                print(f"Current Position: {current_pos}")
                print(f"Last Position: {last_positions[track_id]}")
                print(f"Time Difference: {time_diff}")
                print(f"Distance: {distance}")
                print(f"Speed (m/s): {speed_mps}")
                print(f"Speed (km/h): {speed[track_id]}")
        
        last_positions[track_id] = current_pos
        last_times[track_id] = current_time
        
        for i in range(1, len(trajectoire[track_id])):
            cv2.line(og_frame, trajectoire[track_id][i-1], trajectoire[track_id][i], (255, 0, 0), 2)
            
        cv2.rectangle(og_frame, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), thickness=2)
        
        # Get the class name for the current track
        class_id = int(pred_cls[track_id % len(pred_cls)])  # Use modulo to avoid index out of bounds
        class_name = class_names[class_id]
        
        # Display the class name and speed
        cv2.putText(og_frame, f"{class_name}: {speed[track_id]:.1f} km/h", (int(x1) + 10, int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color=(0, 255, 0), thickness=2)
    
    out.write(cv2.cvtColor(og_frame, cv2.COLOR_RGB2BGR))

    display_frame = cv2.cvtColor(og_frame, cv2.COLOR_RGB2BGR)
    height, width = display_frame.shape[:2]

    resized_frame = cv2.resize(display_frame, (width // 2, height // 2))

    cv2.imshow('frame', cv2.resize(resized_frame, (frame_width // downscale_factor, frame_height // downscale_factor)))
    if cv2.waitKey(1) == ord('q'):
        break
    
    end_time = time.time()
    frame_times.append(end_time - start_time)

cap.release()
out.release()
cv2.destroyAllWindows()

# Calculate the average processing time per frame
average_frame_time = np.mean(frame_times)
new_fps = 1 / average_frame_time
print(f"Original FPS: {fps}, New FPS: {new_fps}")

# Use the new FPS for the output video
out = cv2.VideoWriter(output, fourcc, new_fps, (frame_width, frame_height))

In [4]:
class_list = model.names
class_list

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

In [5]:
cap = cv2.VideoCapture('test_videos/4.mp4')

In [7]:
import cv2

cap = cv2.VideoCapture('test_videos/4.mp4')

# Define new frame dimensions
width, height = 640, 360  # Example dimensions; adjust as needed

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Resize frame for faster processing
    #resized_frame = cv2.resize(frame, (width, height))

    # Detecting and tracking
    results = model.track(frame, persist=True)

    # Optional: draw results back on the original frame if needed
    # frame = draw_results_on_frame(frame, results)  # Placeholder for drawing logic

    # Display the frame
    cv2.imshow("Yolo", frame)  # Display resized frame if that's your target

    # Add delay for consistent playback
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 1 bus, 116.0ms
Speed: 3.0ms preprocess, 116.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bus, 97.0ms
Speed: 6.0ms preprocess, 97.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bus, 94.0ms
Speed: 2.0ms preprocess, 94.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 bus, 74.0ms
Speed: 3.0ms preprocess, 74.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 bus, 75.0ms
Speed: 3.0ms preprocess, 75.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 bus, 76.0ms
Speed: 2.0ms preprocess, 76.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 bus, 77.0ms
Speed: 3.0ms preprocess, 77.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 car, 1 bus, 76.0ms
Speed: 3.0ms preprocess, 76.0ms inference, 2.0ms postprocess per image at sh

In [9]:
import cv2

# Initialize video capture
cap = cv2.VideoCapture('test_videos/4.mp4')

# Define new frame dimensions
width, height = 640, 360  # Example dimensions; adjust as needed

# Vehicle class IDs (from your model names)
vehicle_class_ids = [1, 2, 3, 5, 6, 7]  # 'bicycle', 'car', 'motorcycle', 'bus', 'train', 'truck'

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Resize frame for faster processing (optional)
    resized_frame = cv2.resize(frame, (width, height))

    # Detect and track using YOLO model
    try:
        results = model.track(resized_frame, persist=True)
    except Exception as e:
        print(f"Tracking failed: {e}")
        continue

    # Filter results by vehicle classes
    filtered_results = [res for res in results if res['class'] in vehicle_class_ids]

    # Draw bounding boxes for filtered results
    for res in filtered_results:
        x1, y1, x2, y2 = res['bbox']  # Assuming 'bbox' contains the coordinates
        class_id = res['class']
        label = model.names[class_id]  # Get the class name

        # Draw bounding box and label
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame with tracked vehicles
    cv2.imshow("YOLO Vehicle Tracking", frame)

    # Add delay for consistent playback
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()



Tracking failed: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\video\src\lkpyramid.cpp:1394: error: (-215:Assertion failed) prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size() in function 'cv::`anonymous-namespace'::SparsePyrLKOpticalFlowImpl::calc'


Tracking failed: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\video\src\lkpyramid.cpp:1394: error: (-215:Assertion failed) prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size() in function 'cv::`anonymous-namespace'::SparsePyrLKOpticalFlowImpl::calc'


Tracking failed: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\video\src\lkpyramid.cpp:1394: error: (-215:Assertion failed) prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size() in function 'cv::`anonymous-namespace'::SparsePyrLKOpticalFlowImpl::calc'


Tracking failed: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\video\src\lkpyramid.cpp:1394: error: (-215:Assertion failed) 

KeyboardInterrupt: 