In [32]:
from ultralytics import YOLO
import cv2
import numpy as np

class Model:

    def __init__(self):
        self.yolo_model = YOLO("yolo11l_half.engine")

        self.classes = [0] # humans
        self.tracked_id = None
        self.last_position = None
        self.lost = False
        self.ignore_ids = []

        self.redetect_within = 50

    def show_all_boxes(self, image):
        image = image[:, :, :3] # remove A channel from frame
        result = self.yolo_model.track(image, persist=True, classes=self.classes, verbose=False)[0]
        return self.np_to_jpeg(result.plot())


    def track(self, image):
        self.result = self.yolo_model.track(image[:, :, :3], persist=True, classes=self.classes, verbose=False)[0]
        if not self.lost:
            tracked_index = self._get_tracked_index()
            
            if tracked_index is False:
                # set as lost if tracked id not found
                self.lost = True
                return False
            else:
                # add other detected objects to list of ids to ignore
                for i in self.result.boxes.id:
                    if i != self.tracked_id:
                        self.ignore_ids.append(i)
                
                self.last_position = self.result.boxes.xyxy[tracked_index]
                return self.last_position
        else:
            # if lost, wait for object with similar position to reappear
            if self.result.boxes.id is None:
                return False
            
            for i in range(len(self.result.boxes.id)):

                # ignore objects that appeared at the same time as original tracked object
                if self.result.boxes.id[i] not in self.ignore_ids:

                    coords = self.result.boxes.xyxy[i]
                    print("new box id: " + str(self.result.boxes.id[i]))
                    print("    coords: ", coords)
                    print("last known: ", self.last_position)
                    # TODO: not redetecting, make it more lenient
                    corners = [
                        abs(coords[0] - self.last_position[0]) <= self.redetect_within,
                        abs(coords[1] - self.last_position[1]) <= self.redetect_within,
                        abs(coords[2] - self.last_position[2]) <= self.redetect_within,
                        abs(coords[3] - self.last_position[3]) <= self.redetect_within
                    ]
                    print(corners)
                    print(corners.count(True))
                    print()
                    if corners.count(True) >= 3:
                        self.lost = False
                        self.tracked_id = self.result.boxes.id[i]
                        return self.track(image)

            # if no suitable objects found
            return False

        

    def _get_tracked_index(self):
        if self.result.boxes.id is None:
            return False
        if self.tracked_id in self.result.boxes.id:
            return np.where(self.result.boxes.id.numpy() == self.tracked_id)[0][0]
        else:
            return False
    
    def np_to_jpeg(self, data):
        return bytes(cv2.imencode('.jpg', data)[1])








In [33]:
import ipywidgets.widgets as widgets
from IPython.display import display
image_display = widgets.Image(format="jpeg", width="45%")
full_display = widgets.Image(format="jpeg", width="45%")
display(widgets.HBox([image_display, full_display]))

location_coords_display = widgets.Label()
display(location_coords_display)

m = Model()

image_display.value = m.show_all_boxes(cv2.imread("img/frame99.jpg"))

user_input = input("enter id to track (or leave blank to skip):")
m.tracked_id = int(user_input)
print("tracking object id " + str(user_input))
            
for i in range(100, 365):
    filename = "img/frame" + str(i + 1) + ".jpg"
    image = cv2.imread(filename)
    tracked_box = m.track(image)
    if tracked_box is False:
        image_display.value = bytes(cv2.imencode('.jpg', image)[1])
    else:
        image_rect = cv2.rectangle(
            image,
            (int(tracked_box[0]), int(tracked_box[1])),
            (int(tracked_box[2]), int(tracked_box[3])),
            (255, 0, 0),
            4
        )
        image_display.value = bytes(cv2.imencode('.jpg', image_rect)[1])

    full_display.value = m.show_all_boxes(image)

    # image_display.value = bytes(cv2.imencode('.jpg', image)[1])
    

HBox(children=(Image(value=b'', format='jpeg', width='45%'), Image(value=b'', format='jpeg', width='45%')))

Label(value='')

Loading yolo11l_half.engine for TensorRT inference...
[03/26/2025-10:00:18] [TRT] [I] The logger passed into createInferRuntime differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.
[03/26/2025-10:00:18] [TRT] [I] Loaded engine size: 52 MiB
[03/26/2025-10:00:18] [TRT] [W] Using an engine plan file across different models of devices is not recommended and is likely to affect performance or even cause errors.
[03/26/2025-10:00:18] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +1, GPU +35, now: CPU 7, GPU 420 (MiB)


enter id to track (or leave blank to skip): 1


tracking object id 1
new box id: tensor(56.)
    coords:  tensor([648.5623,  96.7787, 671.5542, 206.2507])
last known:  tensor([607.0129,   1.7978, 670.5184, 212.9339])
[tensor(True), tensor(False), tensor(True), tensor(True)]
3

new box id: tensor(57.)
    coords:  tensor([411.2874,  94.8757, 485.8384, 198.4579])
last known:  tensor([649.2592,  96.9442, 671.4902, 206.4091])
[tensor(False), tensor(True), tensor(False), tensor(True)]
2

new box id: tensor(58.)
    coords:  tensor([650.7577,  97.2940, 672.0000, 149.8568])
last known:  tensor([649.2592,  96.9442, 671.4902, 206.4091])
[tensor(True), tensor(True), tensor(True), tensor(False)]
3

new box id: tensor(68.)
    coords:  tensor([632.1617,  82.9624, 671.4294, 205.7449])
last known:  tensor([650.6702,  97.5452, 671.9061, 147.9467])
[tensor(True), tensor(True), tensor(True), tensor(False)]
3

new box id: tensor(70.)
    coords:  tensor([520.2321,   1.0691, 671.8786, 156.6502])
last known:  tensor([623.7186,  85.9920, 671.2540, 206.1

KeyboardInterrupt: 

In [3]:
import ipywidgets.widgets as widgets
from IPython.display import display
image_display = widgets.Image(format="jpeg", width="45%")
full_display = widgets.Image(format="jpeg", width="45%")
display(widgets.HBox([image_display, full_display]))

location_coords_display = widgets.Label()
display(location_coords_display)

import pyzed.sl as sl
camera = sl.Camera()
camera_params = sl.InitParameters()
camera_params.camera_resolution = sl.RESOLUTION.VGA
camera_params.depth_mode = sl.DEPTH_MODE.ULTRA
camera_params.coordinate_units = sl.UNIT.MILLIMETER

camera_status = camera.open(camera_params)
if camera_status != sl.ERROR_CODE.SUCCESS:
    print("camera error")
    print(camera_status)
    camera.close()
    exit()

# initialize model
m = Model()

# get initial image and choose object to track
image_mat = sl.Mat()
started_tracking = False
while not started_tracking:
    err = camera.grab()
    if err == sl.ERROR_CODE.SUCCESS:
        camera.retrieve_image(image_mat)
        image = image_mat.get_data()
        
        image_display.value = m.show_all_boxes(image)
    
        user_input = input("enter id to track (or leave blank to skip):")
        if user_input == "":
            continue
        else:
            m.tracked_id = int(user_input)
            print("tracking object id " + str(user_input))
            started_tracking = True

# start tracking
running = True
while running:
    err = camera.grab()
    if err == sl.ERROR_CODE.SUCCESS:
        camera.retrieve_image(image_mat)
        image = image_mat.get_data()

        tracked_box = m.track(image)
        if tracked_box is False:
            image_display.value = bytes(cv2.imencode('.jpg', image)[1])
        else:
            image_rect = cv2.rectangle(
                image,
                (int(tracked_box[0]), int(tracked_box[1])),
                (int(tracked_box[2]), int(tracked_box[3])),
                (255, 0, 0),
                4
            )
            image_display.value = bytes(cv2.imencode('.jpg', image_rect)[1])

        full_display.value = m.show_all_boxes(image)
    

camera.close()

HBox(children=(Image(value=b'', format='jpeg', width='45%'), Image(value=b'', format='jpeg', width='45%')))

Label(value='')

[2025-03-26 09:06:45 UTC][ZED][INFO] Logging level INFO
[2025-03-26 09:06:45 UTC][ZED][INFO] Logging level INFO
[2025-03-26 09:06:45 UTC][ZED][INFO] Logging level INFO
[2025-03-26 09:06:46 UTC][ZED][INFO] [Init]  Depth mode: ULTRA
[2025-03-26 09:06:47 UTC][ZED][INFO] [Init]  Camera successfully opened.
[2025-03-26 09:06:47 UTC][ZED][INFO] [Init]  Camera FW version: 1523
[2025-03-26 09:06:47 UTC][ZED][INFO] [Init]  Video mode: VGA@100
[2025-03-26 09:06:47 UTC][ZED][INFO] [Init]  Serial Number: S/N 34032459
[31m[1mrequirements:[0m Ultralytics requirement ['lap>=0.5.12'] not found, attempting AutoUpdate...
Defaulting to user installation because normal site-packages is not writeable
Collecting lap>=0.5.12
  Downloading lap-0.5.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (1.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 3.8 MB/s eta 0:00:00
Installing collected packages: lap
Successfully installed lap-0.5.12

[31m[1mrequirements:[0m AutoUpdate succ

KeyboardInterrupt: Interrupted by user

In [12]:
import ipywidgets.widgets as widgets
from IPython.display import display
image_display = widgets.Image(format="jpeg", width="45%")
full_display = widgets.Image(format="jpeg", width="45%")
display(widgets.HBox([image_display, full_display]))

location_coords_display = widgets.Label()
display(location_coords_display)

import cv2
import time

import pyzed.sl as sl
camera = sl.Camera()
camera_params = sl.InitParameters()
camera_params.camera_resolution = sl.RESOLUTION.VGA
camera_params.depth_mode = sl.DEPTH_MODE.ULTRA
camera_params.coordinate_units = sl.UNIT.MILLIMETER

camera_status = camera.open(camera_params)
if camera_status != sl.ERROR_CODE.SUCCESS:
    print("camera error")
    print(camera_status)
    camera.close()
    exit()

time.sleep(5)

image_mat = sl.Mat()
frame = 0
max_frames = 10000
while frame < max_frames:
    err = camera.grab()
    if err == sl.ERROR_CODE.SUCCESS:
        camera.retrieve_image(image_mat)
        image = image_mat.get_data()
        image_display.value = bytes(cv2.imencode('.jpg', image)[1])
        # print("taken photo")
        cv2.imwrite("img/frame" + str(frame) + ".jpg", image)
    frame += 1

print("end")
camera.close()

HBox(children=(Image(value=b'', format='jpeg', width='45%'), Image(value=b'', format='jpeg', width='45%')))

Label(value='')

[2025-03-26 09:31:16 UTC][ZED][INFO] Logging level INFO
[2025-03-26 09:31:16 UTC][ZED][INFO] Logging level INFO
[2025-03-26 09:31:16 UTC][ZED][INFO] Logging level INFO
[2025-03-26 09:31:16 UTC][ZED][INFO] [Init]  Depth mode: ULTRA
[2025-03-26 09:31:17 UTC][ZED][INFO] [Init]  Camera successfully opened.
[2025-03-26 09:31:17 UTC][ZED][INFO] [Init]  Camera FW version: 1523
[2025-03-26 09:31:17 UTC][ZED][INFO] [Init]  Video mode: VGA@100
[2025-03-26 09:31:17 UTC][ZED][INFO] [Init]  Serial Number: S/N 34032459


KeyboardInterrupt: 