In [4]:
import cv2
from ultralytics import YOLO

# Load the pretrained YOLOv8 model (you can use 'yolov8n.pt', 'yolov8s.pt', etc.)
model = YOLO('yolov8s.pt')  # 'n' is nano (smallest), you can change it as needed

# Load video
video_path = 'input_video4.mp4'  # Replace with your video path
cap = cv2.VideoCapture(video_path)

# Output video setup
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output_car_detection.mp4', fourcc, 30.0, (
    int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
    int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
))

# List of vehicle class IDs in COCO dataset
VEHICLE_CLASS_IDS = [2, 3, 5, 7, 1]  # Car, Motorcycle, Bus, Truck, Bicycle

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 inference on the frame
    results = model(frame)[0]

    # Loop through detections
    for box in results.boxes:
        cls_id = int(box.cls[0])
        if cls_id in VEHICLE_CLASS_IDS:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = float(box.conf[0])
            label = f"Car {conf:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 1)
            cv2.putText(frame, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1)

    out.write(frame)
    cv2.imshow('Car Detection', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()



0: 384x640 19 persons, 1 bicycle, 8 cars, 1 motorcycle, 2 buss, 1 truck, 1 traffic light, 661.6ms
Speed: 5.5ms preprocess, 661.6ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 7 cars, 2 buss, 1 truck, 1 traffic light, 501.7ms
Speed: 7.7ms preprocess, 501.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 7 cars, 2 buss, 1 truck, 1 traffic light, 1 suitcase, 697.7ms
Speed: 4.1ms preprocess, 697.7ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 8 cars, 2 buss, 1 truck, 1 traffic light, 1 suitcase, 371.2ms
Speed: 4.2ms preprocess, 371.2ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 persons, 7 cars, 2 buss, 1 truck, 1 traffic light, 481.9ms
Speed: 3.0ms preprocess, 481.9ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 16 persons, 7 cars, 1 truck, 1 traffic light, 496.9ms
Speed: 10.3ms preprocess,