# Pruebas Object Detection

In [82]:
import cv2
import numpy as np
import math
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

from pydantic import BaseModel
from collections import defaultdict


## VisionEye View With Distance Calculation 
Source: https://docs.ultralytics.com/guides/vision-eye/#samples

In [84]:
# img = cv2.imread('bus.jpg')
model = YOLO('yolov8n.pt')
model = YOLO('yolov8n-pose.pt')
names = model.names
person_clss  = 0
print(names[person_clss])

person


In [41]:
cap = cv2.VideoCapture("personas.mp4")

w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

print(f"Video frame width: {w}, height: {h}, fps: {fps}")
# out = cv2.VideoWriter('visioneye-distance-calculation.avi', cv2.VideoWriter_fourcc(*'MJPG'), fps, (w, h))

center_point = (0, h)
pixel_per_meter = 10

txt_color, txt_background, bbox_clr = ((0, 0, 0), (255, 255, 255), (255, 0, 255))

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    annotator = Annotator(im0, line_width=2)

    results = model.track(im0, persist=True)
    results.plot()
    boxes = results[0].boxes.xyxy.cpu()


    if results[0].boxes.id is not None:
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for box, track_id in zip(boxes, track_ids):
            annotator.box_label(box, label=str(track_id), color=bbox_clr)
            annotator.visioneye(box, center_point)

            x1, y1 = int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)    # Bounding box centroid

            distance = (math.sqrt((x1 - center_point[0]) ** 2 + (y1 - center_point[1]) ** 2))/pixel_per_meter

            text_size, _ = cv2.getTextSize(f"Distance: {distance:.2f} m", cv2.FONT_HERSHEY_SIMPLEX,1.2, 3)
            cv2.rectangle(im0, (x1, y1 - text_size[1] - 10),(x1 + text_size[0] + 10, y1), txt_background, -1)
            cv2.putText(im0, f"Distance: {distance:.2f} m",(x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 1.2,txt_color, 3)

    # out.write(im0)
    cv2.imshow("visioneye-distance-calculation", im0)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# out.release()
cap.release()
cv2.destroyAllWindows()


Video frame width: 0, height: 0, fps: 0
Video frame is empty or video processing has been successfully completed.


## Distance Calculation

Source: 
* https://github.com/Asadullah-Dal17/Yolov4-Detector-and-Distance-Estimator/blob/master/DistanceEstimation.py
* https://www.youtube.com/watch?v=FcRCwTgYXJw&ab_channel=AiPhile 

### Funciones Auxiliares

In [15]:
def focal_length_finder(measured_distance, real_width, width_in_frame):
    focal_length = (width_in_frame * measured_distance) / real_width
    return focal_length

In [14]:
def distance_finder(focal_length, real_width, width_in_frame):
    distance = (real_width * focal_length) / width_in_frame
    return distance

### Detección y Calculo de distancia en Video

In [91]:
# Medidas en metros
KNOWN_DISTANCE = 2
PERSON_WIDTH = 0.38 # Ancho de una persona promedio - Medidas de Fri - 38 cm
VIDEO_PATH = "//home/fcanof/Documents/sdv_vision_notebooks/data/ref_video/2_personas.mp4"

Con Bounding Boxes / Impreciso

In [None]:
cap = cv2.VideoCapture(VIDEO_PATH)
count = 0
while True:
    ret, frame = cap.read()

    results = model(frame, classes=0)
    if results[0].boxes is not None:
        boxes_w = results[0].boxes.xywh.cpu()
        boxes_xyxy = results[0].boxes.xyxy.cpu()
        for box_w, box_xyxy in zip(boxes_w, boxes_xyxy):
            print("Mode: ", MODE)
            person_width = box_w[2]
            person_height = box_w[3]
            x,y = int(box_xyxy[0]), int(box_xyxy[1]+(person_height-50))
            if MODE == "calibration":
                focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width)
                print(f"Focal length: {focal_person}")
            else:
                distance = distance_finder(focal_person, PERSON_WIDTH, person_width)
                distance = round(float(distance), 3)
                print(f"Distance: {distance} meters")
            cv2.rectangle(frame, (x, y-1), (x+200, y+25), (0,0,255),-1 )
            text = "Distance "+str(distance)+" meters"
            cv2.putText(frame, text, (x+10,y+20), cv2.FONT_HERSHEY_SIMPLEX, 0.48, (255,255,255), 2)

    cv2.imshow('frame',frame)
    count += 1
    if count == 100:
        MODE = "distance"
    key = cv2.waitKey(1)
    if key ==ord('q'):
        break
cv2.destroyAllWindows()
cap.release()

Con KeyPoints / Más Preciso

In [97]:
class GetKeypoint(BaseModel):
    NOSE:           int = 0
    LEFT_EYE:       int = 1
    RIGHT_EYE:      int = 2
    LEFT_EAR:       int = 3
    RIGHT_EAR:      int = 4
    LEFT_SHOULDER:  int = 5
    RIGHT_SHOULDER: int = 6
    LEFT_ELBOW:     int = 7
    RIGHT_ELBOW:    int = 8
    LEFT_WRIST:     int = 9
    RIGHT_WRIST:    int = 10
    LEFT_HIP:       int = 11
    RIGHT_HIP:      int = 12
    LEFT_KNEE:      int = 13
    RIGHT_KNEE:     int = 14
    LEFT_ANKLE:     int = 15
    RIGHT_ANKLE:    int = 16
get_keypoint = GetKeypoint()
cap = cv2.VideoCapture(VIDEO_PATH)
count = 0
MODE = "calibration"
while True:
    ret, frame = cap.read()

    results = model(frame, classes=0, show=True, conf = 0.8)
    if results[0].boxes is not None:
        boxes_w = results[0].boxes.xywh.cpu()
        boxes_xyxy = results[0].boxes.xyxy.cpu()
        keypoints = results[0].keypoints.xy.cpu().numpy()
        for box_w, box_xyxy, keypoint in zip(boxes_w, boxes_xyxy, keypoints):
            print("Mode: ", MODE)
            shoulder_left_x, _sly = keypoint[get_keypoint.LEFT_SHOULDER]
            shoulder_right_x, _sry = keypoint[get_keypoint.RIGHT_SHOULDER]
            person_width = int(abs(shoulder_left_x - shoulder_right_x))
            print("Person width: ", person_width)
            person_height = box_w[3]
            x,y = int(box_xyxy[0]), int(box_xyxy[1]+(person_height-50))
            if MODE == "calibration":
                focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width)
                print(f"Focal length: {focal_person}")
            else:
                distance = distance_finder(focal_person, PERSON_WIDTH, person_width)
                distance = round(float(distance), 3)
                print(f"Distance: {distance} meters")
            cv2.rectangle(frame, (x, y-1), (x+200, y+25), (0,0,255),-1 )
            text = "Distance "+str(distance)+" meters"
            cv2.putText(frame, text, (x+10,y+20), cv2.FONT_HERSHEY_SIMPLEX, 0.48, (255,255,255), 2)

    cv2.imshow('frame',frame)
    count += 1
    if count == 150:
        MODE = "distance"
    key = cv2.waitKey(1)
    if key ==ord('q'):
        break
cv2.destroyAllWindows()
cap.release()




0: 384x640 1 person, 90.2ms
Speed: 3.3ms preprocess, 90.2ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)
Mode:  calibration
Person width:  0
Focal length: 0.0

0: 384x640 1 person, 88.0ms
Speed: 3.6ms preprocess, 88.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
Mode:  calibration
Person width:  0
Focal length: 0.0

0: 384x640 1 person, 80.5ms
Speed: 2.9ms preprocess, 80.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)
Mode:  calibration
Person width:  0
Focal length: 0.0

0: 384x640 1 person, 84.2ms
Speed: 6.4ms preprocess, 84.2ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)
Mode:  calibration
Person width:  0
Focal length: 0.0

0: 384x640 1 person, 70.3ms
Speed: 1.5ms preprocess, 70.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)
Mode:  calibration
Person width:  0
Focal length: 0.0

0: 384x640 1 person, 81.9ms
Speed: 2.1ms preprocess, 81.9ms inference, 1.6ms postprocess per image a

KeyboardInterrupt: 