In [25]:
import numpy as np
import cv2
import datetime

# display
import ipywidgets.widgets as widgets
from IPython.display import display
image_display = widgets.Image(format="jpeg", width="45%")
text_display = widgets.Image(format="jpeg", width="45%")
display(widgets.HBox(
    [image_display, text_display],
    layout=widgets.Layout(width="100%")
))

def np_to_jpeg(value):
    return bytes(cv2.imencode('.jpg',value)[1])

## ml model
from ultralytics import YOLO
model = YOLO("yolo11l_half.engine", task="detect", verbose=False)


# camera
import pyzed.sl as sl
camera = sl.Camera()
camera_params = sl.InitParameters()
camera_params.camera_resolution = sl.RESOLUTION.VGA
camera_params.depth_mode = sl.DEPTH_MODE.ULTRA
camera_params.coordinate_units = sl.UNIT.MILLIMETER

camera_status = camera.open(camera_params)
if camera_status != sl.ERROR_CODE.SUCCESS:
    print("camera error")
    print(camera_status)
    camera.close()
    exit()


def get_id(result, object_id):
    for i in range(len(result.boxes.id)):
        if result.boxes.id[i] == object_id:
            return i
    print("no id found")
    return False



FRAMES = 20
person_conf = 0.5

image_mat = sl.Mat()

i = 0
tracking = False
while i < FRAMES:
    err = camera.grab()
    if err == sl.ERROR_CODE.SUCCESS:
        camera.retrieve_image(image_mat)    

        # remove A channel from image mat
        image_array = image_mat.get_data()
        
        # # detect objects
        # result = model.predict(image_array)[0]
        # annotated_image = result.plot()
        # image_display.value = np_to_jpeg(annotated_image)
        
        # track humans
        result = model.track(image_array[:, :, :-1], persist=True, classes=[0], verbose=False)[0]
        # search for person with valid conf
        for i in range(len(result.boxes.cls)):
            if not tracking:
                # show first frame
                text_display.value = np_to_jpeg(result.plot())
                # take input for id to track
                tracking_id = int(input("enter id to track: "))
                print("tracking id " + str(tracking_id))
                tracking = True

        # only track if there is at least one human in frame
        if tracking and result.boxes.id != None:
            box = get_id(result, tracking_id)
            # CURRENT ISSUE = processes images very slowly when tracked object id not found ???
            if box is not False:
                try:
                    print(box, type(box), datetime.datetime.now())
                    annotated_image = cv2.rectangle(
                        image_array,
                        (int(result.boxes.xyxy[box][0]), int(result.boxes.xyxy[box][1])),
                        (int(result.boxes.xyxy[box][2]), int(result.boxes.xyxy[box][3])),
                        (255, 0, 0),
                        4
                    )
                    image_display.value = np_to_jpeg(annotated_image)
                    text_display.value = np_to_jpeg(result.plot())
                    i += 1
                except:
                    pass

camera.close()

HBox(children=(Image(value=b'', format='jpeg', width='45%'), Image(value=b'', format='jpeg', width='45%')), la…

[2025-03-12 10:44:03 UTC][ZED][INFO] Logging level INFO
[2025-03-12 10:44:03 UTC][ZED][INFO] Logging level INFO
[2025-03-12 10:44:03 UTC][ZED][INFO] Logging level INFO
[2025-03-12 10:44:04 UTC][ZED][INFO] [Init]  Depth mode: ULTRA
[2025-03-12 10:44:05 UTC][ZED][INFO] [Init]  Camera successfully opened.
[2025-03-12 10:44:05 UTC][ZED][INFO] [Init]  Camera FW version: 1523
[2025-03-12 10:44:05 UTC][ZED][INFO] [Init]  Video mode: VGA@100
[2025-03-12 10:44:05 UTC][ZED][INFO] [Init]  Serial Number: S/N 32709812
Loading yolo11l_half.engine for TensorRT inference...
[03/12/2025-10:44:05] [TRT] [I] The logger passed into createInferRuntime differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.
[03/12/2025-10:44:05] [TRT] [I] Loaded engine size: 52 MiB
[03/12/2025-10:44:05] [TRT] [W] Using an engine plan file across different models of devices is not recommended and is likely t

enter id to track:  6


tracking id 6
5 <class 'int'> 2025-03-12 10:44:09.473774
5 <class 'int'> 2025-03-12 10:44:09.736498
5 <class 'int'> 2025-03-12 10:44:09.924786
5 <class 'int'> 2025-03-12 10:44:10.101088
5 <class 'int'> 2025-03-12 10:44:10.284768
5 <class 'int'> 2025-03-12 10:44:10.460868
5 <class 'int'> 2025-03-12 10:44:10.641457
5 <class 'int'> 2025-03-12 10:44:10.809545
4 <class 'int'> 2025-03-12 10:44:10.977889
4 <class 'int'> 2025-03-12 10:44:11.141627
4 <class 'int'> 2025-03-12 10:44:11.307496
4 <class 'int'> 2025-03-12 10:44:11.460172
4 <class 'int'> 2025-03-12 10:44:11.611717
4 <class 'int'> 2025-03-12 10:44:11.756250
4 <class 'int'> 2025-03-12 10:44:11.925827
4 <class 'int'> 2025-03-12 10:44:12.076448
3 <class 'int'> 2025-03-12 10:44:12.211138
3 <class 'int'> 2025-03-12 10:44:12.338296
3 <class 'int'> 2025-03-12 10:44:12.466466
3 <class 'int'> 2025-03-12 10:44:12.599434
3 <class 'int'> 2025-03-12 10:44:12.728020
3 <class 'int'> 2025-03-12 10:44:12.862747
3 <class 'int'> 2025-03-12 10:44:12.9932

KeyboardInterrupt: 

In [16]:
result.boxes

ultralytics.engine.results.Boxes object with attributes:

cls: tensor([0., 0., 0., 0., 0., 0.])
conf: tensor([0.8984, 0.8789, 0.8354, 0.3970, 0.5098, 0.6851])
data: tensor([[5.3337e+02, 9.8692e-01, 6.7176e+02, 2.4988e+02, 1.0000e+00, 8.9844e-01, 0.0000e+00],
        [5.0693e+02, 3.6132e+01, 5.9349e+02, 2.3677e+02, 2.0000e+00, 8.7891e-01, 0.0000e+00],
        [3.8816e+02, 9.1830e+01, 5.4545e+02, 2.3694e+02, 3.0000e+00, 8.3545e-01, 0.0000e+00],
        [1.8499e+02, 8.8551e+01, 2.4070e+02, 1.7331e+02, 4.0000e+00, 3.9697e-01, 0.0000e+00],
        [3.8702e+01, 1.2379e+02, 8.4957e+01, 1.7468e+02, 6.0000e+00, 5.0977e-01, 0.0000e+00],
        [3.5308e-02, 1.2524e+02, 4.1590e+01, 2.2258e+02, 7.0000e+00, 6.8506e-01, 0.0000e+00]])
id: tensor([1., 2., 3., 4., 6., 7.])
is_track: True
orig_shape: (376, 672)
shape: torch.Size([6, 7])
xywh: tensor([[602.5616, 125.4352, 138.3899, 248.8965],
        [550.2141, 136.4518,  86.5597, 200.6405],
        [466.8036, 164.3864, 157.2841, 145.1119],
        [212.