In [5]:
from ultralytics import YOLO

# Load your trained YOLOv8-pose model
model = YOLO("bestv7-2.pt")

# Export the model to TensorRT engine format
model.export(format="engine", batch=10)


Ultralytics 8.3.38  Python-3.10.16 torch-2.6.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLO11l-pose summary (fused): 483 layers, 26,134,189 parameters, 0 gradients, 90.3 GFLOPs

[34m[1mPyTorch:[0m starting from 'bestv7-2.pt' with input shape (10, 3, 640, 640) BCHW and output shape(s) (10, 35, 8400) (100.6 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.48...
[34m[1mONNX:[0m export success  5.1s, saved as 'bestv7-2.onnx' (100.1 MB)

[34m[1mTensorRT:[0m starting export with TensorRT 10.9.0.34...
[34m[1mTensorRT:[0m input "images" with shape(10, 3, 640, 640) DataType.FLOAT
[34m[1mTensorRT:[0m output "output0" with shape(10, 35, 8400) DataType.FLOAT
[34m[1mTensorRT:[0m building FP32 engine as bestv7-2.engine
[34m[1mTensorRT:[0m export success  205.2s, saved as 'bestv7-2.engine' (126.0 MB)

Export complete (205.8s)
Results saved to [1mC:\wajahat\looking_around_keypoint[0m
Predict:   

'bestv7-2.engine'

In [None]:
import os
import cv2
import numpy as np
import math
import time
from ultralytics import YOLO

def visual_region(center_point, angle):
    length = center_point[1] / 3
    angle_rad = np.radians(angle)
    left_end_point = (
        int(center_point[0] - length * np.sin(angle_rad)),
        int(center_point[1] - length * np.cos(angle_rad))
    )
    right_end_point = (
        int(center_point[0] + length * np.sin(angle_rad)),
        int(center_point[1] - length * np.cos(angle_rad))
    )
    return left_end_point, right_end_point

def get_angle(x1, y1, x2, y2):
    if x1 == x2:
        return 0
    angle_rad = np.arctan2(y2 - y1, x2 - x1) * (180 / np.pi)
    return angle_rad

if __name__ == "__main__":
    model = YOLO("bestv7-2.engine", task="pose")  # Load TensorRT engine
    video_path = "C:/Users/LAMBDA THETA/Videos/evaluation/chunk_06-03-25_13-32-desk21-22-23-24 - Trim.avi"
    cap = cv2.VideoCapture(video_path)
    ymin, ymax = 120, 900
    allowed_angle = 60

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        start_time = time.time()
        results = model(frame)

        for result in results:
            keypoints = result.keypoints
            if keypoints is not None:
                keypoints_data = keypoints.data
                for person_keypoints in keypoints_data:
                    for kp in person_keypoints:
                        x, y, confidence = kp
                        if confidence > 0.5:
                            cv2.circle(frame, (int(x), int(y)), 3, (0, 255, 0), -1)

                    A = person_keypoints[2]
                    B = person_keypoints[3]
                    Ax, Ay = A[0].item(), A[1].item()
                    Bx, By = B[0].item(), B[1].item()
                    cv2.line(frame, (int(Ax), int(Ay)), (int(Bx), int(By)), (0, 255, 0), 2)

                    angle = get_angle(Ax, Ay, Bx, By)
                    center = (int((Ax + Bx) / 2), int((Ay + By) / 2))
                    diff = (ymax - ymin) / 2
                    LA_angle_threshold = allowed_angle - int(((By + Ay) / 2 - diff) / (ymax - diff) * 30)
                    left_point, right_point = visual_region(center, LA_angle_threshold)
                    cv2.line(frame, center, left_point, (255, 255, 255), 4)
                    cv2.line(frame, center, right_point, (255, 255, 255), 4)

                    if abs(angle) >= LA_angle_threshold:
                        cv2.putText(frame, "Looking around", (center[0], center[1] - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 4)

        # fps = 1.0 / (time.time() - start_time + 1e-4)
        # cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        resized_frame = cv2.resize(frame, (1280, 640))
        cv2.imshow('Pose Detection', resized_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        
        print("fps =", 1.0 / (time.time() - start_time + 0.0001))
        
    cap.release()
    cv2.destroyAllWindows()


Loading bestv7-2.engine for TensorRT inference...

0: 640x640 1 person, 15.0ms
Speed: 26.0ms preprocess, 15.0ms inference, 67.0ms postprocess per image at shape (1, 3, 640, 640)
fps = 0.7208992695665472

0: 640x640 1 person, 15.5ms
Speed: 2.0ms preprocess, 15.5ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
fps = 36.19853817802146

0: 640x640 1 person, 17.4ms
Speed: 1.0ms preprocess, 17.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
fps = 35.098945542756326

0: 640x640 1 person, 15.0ms
Speed: 3.0ms preprocess, 15.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
fps = 35.58583353574668

0: 640x640 1 person, 16.5ms
Speed: 1.0ms preprocess, 16.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)
fps = 35.687569260688434

0: 640x640 1 person, 17.8ms
Speed: 2.0ms preprocess, 17.8ms inference, 2.2ms postprocess per image at shape (1, 3, 640, 640)
fps = 32.13793813324313

0: 640x640 1 person, 17.0ms
Speed: 2.0ms pre

IndexError: index 2 is out of bounds for dimension 0 with size 0

: 