In [34]:
from ultralytics import YOLO
import cv2
import numpy as np

class Model:

    def __init__(self):
        self.yolo_model = YOLO("yolo11l_half.engine")

        self.classes = [0] # humans
        self.track_id = False
        self.result = False
        
    def analyze(self, frame):
        """
        analyzes frame and saves result to class
        get result info using get_image, get_
        
        params
        frame (np.array) : RGBA image

        returns (bool) : False if no objects were detected, True otherwise
        """
        frame = frame[:, :, :-1] # remove A channel from fram
        self.result = self.yolo_model.track(frame, persist=True, classes=self.classes, verbose=False)[0]
        
        
        return self.result.boxes.id != None

    def track(self, track_id):
        """
        start tracking specified id
        
        params
        track_id (int) : numerical id of object to track
        """
        self.track_id = track_id


    
    def _get_tracking_index(self):
        """
        finds index of tracked id in results object
        raise exception if not found

        returns (int) : index of tracked object
        """
        if self.track_id in self.result.boxes.id:
            return np.where(self.result.boxes.id.numpy() == self.track_id)[0][0]
        else:
            raise Exception("tracked id not found!!")

    
    def get_box(self):
        """
        returns coordinates of detected bounding box (top left, bottom right)
        
        returns ((int, int), (int, int)) : tuple of pairs of coordinates
        """
        object_index = self._get_tracking_index()
        xyxy = self.result.boxes.xyxy[object_index]
        return (
            (int(xyxy[0]), int(xyxy[1])),
            (int(xyxy[2]), int(xyxy[3]))
        )

    def get_box_centre(self):
        """
        get coordinates for centre of detected bounding box
        
        returns (int, int)
        """
        box_coords = self.get_box()
        return (
            int((box_coords[0][0] + box_coords[1][0]) / 2),
            int((box_coords[0][1] + box_coords[1][1]) / 2)
        )
    
    def get_box_image(self, colour=(255, 0, 0), thickness=4):
        """
        params
        colour (int, int, int) : BGR colour of box 
        thickness (int) : thickness of box line
        
        returns (bytes) : jpeg of frame with 1 bounding box around tracked object
        """
        box_coords = self.get_box()
        annotated_image = cv2.rectangle(
            self.result.orig_img.copy(),
            box_coords[0],
            box_coords[1],
            colour,
            thickness
        )
        return self.np_to_jpeg(annotated_image)


    def get_all_boxes_image(self):
        """
        returns image with all detected bounding boxes + their id and confidence
        
        returns (bytes) : jpeg of frame with detected bounding boxes 
        """
            
        return self.np_to_jpeg(self.result.plot())
    
    def np_to_jpeg(self, data):
        return bytes(cv2.imencode('.jpg', data)[1])


In [39]:
import ipywidgets.widgets as widgets
from IPython.display import display
image_display = widgets.Image(format="jpeg", width="45%")
full_display = widgets.Image(format="jpeg", width="45%")
display(widgets.HBox([image_display, full_display]))

location_coords_display = widgets.Label()
display(location_coords_display)

import motors
robot = motors.MotorsYukon()

import pyzed.sl as sl
camera = sl.Camera()
camera_params = sl.InitParameters()
camera_params.camera_resolution = sl.RESOLUTION.VGA
camera_params.depth_mode = sl.DEPTH_MODE.ULTRA
camera_params.coordinate_units = sl.UNIT.MILLIMETER

camera_status = camera.open(camera_params)
if camera_status != sl.ERROR_CODE.SUCCESS:
    print("camera error")
    print(camera_status)
    camera.close()
    exit()

# initialize model
m = Model()

# get initial image and choose object to track
image_mat = sl.Mat()
started_tracking = False
while not started_tracking:
    err = camera.grab()
    if err == sl.ERROR_CODE.SUCCESS:
        camera.retrieve_image(image_mat)
        image = image_mat.get_data()
        
        if m.analyze(image):
            image_display.value = m.get_all_boxes_image()
            
            user_input = input("enter id to track (or leave blank to skip):")
            if user_input == "":
                continue
            else:
                m.track(int(user_input))
                print("tracking object id " + str(user_input))
                started_tracking = True
        else:
            continue


# params for turning
from_edge = 250
left = from_edge
right = 672 - from_edge
turn_speed = 0.5

# start tracking
running = True
while running:
    err = camera.grab()
    if err == sl.ERROR_CODE.SUCCESS:
        camera.retrieve_image(image_mat)
        image = image_mat.get_data()

        m.analyze(image)
        image_display.value = m.get_box_image()
        full_display.value = m.get_all_boxes_image()
        location_coords = m.get_box_centre()
        location_coords_display.value = str(location_coords)

        if location_coords[0] < left:
            robot.left(turn_speed)
        elif location_coords[0] > right:
            robot.right(turn_speed)
        else:
            robot.stop()
        
    

camera.close()

HBox(children=(Image(value=b'', format='jpeg', width='45%'), Image(value=b'', format='jpeg', width='45%')))

Label(value='')

[2025-03-19 11:31:53 UTC][ZED][INFO] Logging level INFO
[2025-03-19 11:31:53 UTC][ZED][INFO] Logging level INFO
[2025-03-19 11:31:54 UTC][ZED][INFO] Logging level INFO
[2025-03-19 11:31:54 UTC][ZED][INFO] [Init]  Depth mode: ULTRA
[2025-03-19 11:31:55 UTC][ZED][INFO] [Init]  Camera successfully opened.
[2025-03-19 11:31:55 UTC][ZED][INFO] [Init]  Camera FW version: 1523
[2025-03-19 11:31:55 UTC][ZED][INFO] [Init]  Video mode: VGA@100
[2025-03-19 11:31:55 UTC][ZED][INFO] [Init]  Serial Number: S/N 37413003
Loading yolo11l_half.engine for TensorRT inference...
[03/19/2025-11:31:56] [TRT] [I] The logger passed into createInferRuntime differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.
[03/19/2025-11:31:56] [TRT] [I] Loaded engine size: 52 MiB
[03/19/2025-11:31:56] [TRT] [W] Using an engine plan file across different models of devices is not recommended and is likely t

enter id to track (or leave blank to skip): 
enter id to track (or leave blank to skip): 
enter id to track (or leave blank to skip): 
enter id to track (or leave blank to skip): 
enter id to track (or leave blank to skip): 
enter id to track (or leave blank to skip): 
enter id to track (or leave blank to skip): 4


tracking object id 4


Exception: tracked id not found!!

In [27]:
type(m.result.orig_img
)

numpy.ndarray