In [70]:
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit  # автоматически инициализирует CUDA контекст
import torch
import cv2
import sys
import math
import time
import numpy as np
import yaml
import torch
import numpy as np

In [71]:
def cal_bbox(score_map_ctr, size_map, offset_map, return_score=True):
        feat_sz = 14
        max_score, idx = torch.max(score_map_ctr.flatten(1), dim=1, keepdim=True) # score_map_ctr.flatten(1): torch.Size([32, 256]) idx: torch.Size([32, 1]) max_score: torch.Size([32, 1])
        idx_y = torch.div(idx, feat_sz, rounding_mode='floor')
        idx_x = idx % feat_sz
       
        

        idx = idx.unsqueeze(1).expand(idx.shape[0], 2, 1)
        size = size_map.flatten(2).gather(dim=2, index=idx) # size_map: torch.Size([32, 2, 16, 16])  size_map.flatten(2): torch.Size([32, 2, 256])
        offset = offset_map.flatten(2).gather(dim=2, index=idx).squeeze(-1)

        bbox = torch.cat([(idx_x.to(torch.float) + offset[:, :1]) / feat_sz,
                          (idx_y.to(torch.float) + offset[:, 1:]) / feat_sz,
                          size.squeeze(-1)], dim=1)

        if return_score:
            return bbox, max_score
        return (bbox, max_score) if return_score else bbox
        
class Preprocessor(object):
    def __init__(self):
        self.mean = torch.tensor([0.485, 0.456, 0.406]).view((1, 3, 1, 1)).cuda()
        self.std = torch.tensor([0.229, 0.224, 0.225]).view((1, 3, 1, 1)).cuda()
        self.mm_mean = torch.tensor([0.485, 0.456, 0.406, 0.485, 0.456, 0.406]).view((1, 6, 1, 1)).cuda()
        self.mm_std = torch.tensor([0.229, 0.224, 0.225, 0.229, 0.224, 0.225]).view((1, 6, 1, 1)).cuda()

    def process(self, img_arr: np.ndarray):
        if img_arr.shape[-1] == 6:
            mean = self.mm_mean
            std = self.mm_std
        else:
            mean = self.mean
            std = self.std
        # Deal with the image patch
        img_tensor = torch.tensor(img_arr).cuda().float().permute((2,0,1)).unsqueeze(dim=0)
        # img_tensor = torch.tensor(img_arr).float().permute((2,0,1)).unsqueeze(dim=0)
        img_tensor_norm = ((img_tensor / 255.0) - mean) / std  # (1,3,H,W)
        return img_tensor_norm
    
def hann1d(sz: int, centered = True) -> torch.Tensor:
    """1D cosine window."""
    if centered:
        return 0.5 * (1 - torch.cos((2 * math.pi / (sz + 1)) * torch.arange(1, sz + 1).float()))
    w = 0.5 * (1 + torch.cos((2 * math.pi / (sz + 2)) * torch.arange(0, sz//2 + 1).float()))
    return torch.cat([w, w[1:sz-sz//2].flip((0,))])
    
def hann2d(sz: torch.Tensor, centered = True) -> torch.Tensor:
    """2D cosine window."""
    return hann1d(sz[0].item(), centered).reshape(1, 1, -1, 1) * hann1d(sz[1].item(), centered).reshape(1, 1, 1, -1)    

def sample_target(im, target_bb, search_area_factor, output_sz=None):
   
    if not isinstance(target_bb, list):
        x, y, w, h = target_bb.tolist()
    else:
        x, y, w, h = target_bb
    # Crop image
    crop_sz = math.ceil(math.sqrt(w * h) * search_area_factor)

    if crop_sz < 1:
        raise Exception('Too small bounding box.')

    x1 = round(x + 0.5 * w - crop_sz * 0.5)
    x2 = x1 + crop_sz

    y1 = round(y + 0.5 * h - crop_sz * 0.5)
    y2 = y1 + crop_sz

    x1_pad = max(0, -x1)
    x2_pad = max(x2 - im.shape[1] + 1, 0)

    y1_pad = max(0, -y1)
    y2_pad = max(y2 - im.shape[0] + 1, 0)

    # Crop target
    im_crop = im[y1 + y1_pad:y2 - y2_pad, x1 + x1_pad:x2 - x2_pad, :]

    # Pad
    im_crop_padded = cv2.copyMakeBorder(im_crop, y1_pad, y2_pad, x1_pad, x2_pad, cv2.BORDER_CONSTANT)
    # deal with attention mask
    H, W, _ = im_crop_padded.shape

    if output_sz is not None:
        resize_factor = output_sz / crop_sz
        im_crop_padded = cv2.resize(im_crop_padded, (output_sz, output_sz))

        return im_crop_padded, resize_factor

    else:
        return im_crop_padded, 1.0
def transform_image_to_crop(box_in: torch.Tensor, box_extract: torch.Tensor, resize_factor: float,
                            crop_sz: torch.Tensor, normalize=False) -> torch.Tensor:
   
    box_extract_center = box_extract[0:2] + 0.5 * box_extract[2:4]

    box_in_center = box_in[0:2] + 0.5 * box_in[2:4]

    box_out_center = (crop_sz - 1) / 2 + (box_in_center - box_extract_center) * resize_factor
    box_out_wh = box_in[2:4] * resize_factor

    box_out = torch.cat((box_out_center - 0.5 * box_out_wh, box_out_wh))
    if normalize:
        return box_out / (crop_sz[0]-1)
    else:
        return box_out
def clip_box(box: list, H, W, margin=0):
    x1, y1, w, h = box
    x2, y2 = x1 + w, y1 + h
    x1 = min(max(0, x1), W-margin)
    x2 = min(max(margin, x2), W)
    y1 = min(max(0, y1), H-margin)
    y2 = min(max(margin, y2), H)
    w = max(margin, x2-x1)
    h = max(margin, y2-y1)
    return [x1, y1, w, h]

class BaseTracker():
    """Base class for all trackers."""

    def __init__(self, params):
        self.params = params
        self.visdom = None
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def predicts_segmentation_mask(self):
        return False

    def initialize(self, image, info: dict) -> dict:
        """Overload this function in your tracker. This should initialize the model."""
        raise NotImplementedError

    def track(self, image, info: dict = None) -> dict:
        """Overload this function in your tracker. This should track in the frame and update the model."""
        raise NotImplementedError

    def visdom_draw_tracking(self, image, box, segmentation=None):
        # Упрощенная обработка box без OrderedDict
        if isinstance(box, dict):  # Проверяем на обычный dict вместо OrderedDict
            box = list(box.values())  # Берем только значения
        elif not isinstance(box, (list, tuple)):  # Если не коллекция
            box = (box,)  # Превращаем в кортеж
        
        # Визуализация
        if segmentation is None:
            self.visdom.register((image, *box), 'Tracking', 1, 'Tracking')
        else:
            self.visdom.register((image, *box, segmentation), 'Tracking', 1, 'Tracking')

In [72]:
cfg = {}

# MODEL
cfg["MODEL"] = {}

# MODEL.ENCODER
cfg["MODEL"]["ENCODER"] = {
    "TYPE": "dinov2_vitb14",  # encoder model
    "DROP_PATH": 0,
    "PRETRAIN_TYPE": "mae",  # mae, default, or scratch. This parameter is not activated for dinov2.
    "USE_CHECKPOINT": False,  # to save the memory.
    "STRIDE": 14,
    "POS_TYPE": 'interpolate',  # type of loading the positional encoding. "interpolate" or "index".
    "TOKEN_TYPE_INDICATE": False,  # add a token_type_embedding to indicate the search, template_foreground, template_background
    "INTERACTION_INDEXES": [[0, 6], [6, 12], [12, 18], [18, 24]],
    "GRAD_CKPT": False
}

# MODEL.NECK
cfg["MODEL"]["NECK"] = {
    "N_LAYERS": 4,
    "D_MODEL": 512,
    "D_STATE": 16  # MAMABA_HIDDEN_STATE
}

# MODEL.DECODER
cfg["MODEL"]["DECODER"] = {
    "TYPE": "CENTER",  # MLP, CORNER, CENTER
    "NUM_CHANNELS": 256
}

# TRAIN
cfg["TRAIN"] = {
    "LR": 0.0001,
    "WEIGHT_DECAY": 0.0001,
    "EPOCH": 500,
    "LR_DROP_EPOCH": 400,
    "BATCH_SIZE": 8,
    "NUM_WORKER": 8,
    "OPTIMIZER": "ADAMW",
    "ENCODER_MULTIPLIER": 0.1,  # encoder's LR = this factor * LR
    "FREEZE_ENCODER": False,  # for freezing the parameters of encoder
    "ENCODER_OPEN": [],  # only for debug, open some layers of encoder when FREEZE_ENCODER is True
    "CE_WEIGHT": 1.0,  # weight for cross-entropy loss
    "GIOU_WEIGHT": 2.0,
    "L1_WEIGHT": 5.0,
    "PRINT_INTERVAL": 50,  # interval to print the training log
    "GRAD_CLIP_NORM": 0.1,
    "FIX_BN": False,
    "ENCODER_W": "",
    "TYPE": "normal",  # normal, peft or fft
    "PRETRAINED_PATH": None
}

# TRAIN.SCHEDULER
cfg["TRAIN"]["SCHEDULER"] = {
    "TYPE": "step",
    "DECAY_RATE": 0.1
}

# DATA
cfg["DATA"] = {
    "MEAN": [0.485, 0.456, 0.406],
    "STD": [0.229, 0.224, 0.225],
    "MAX_SAMPLE_INTERVAL": 200,
    "SAMPLER_MODE": "order",
    "LOADER": "tracking"
}

# DATA.TRAIN
cfg["DATA"]["TRAIN"] = {
    "DATASETS_NAME": ["LASOT", "GOT10K_vottrain"],
    "DATASETS_RATIO": [1, 1],
    "SAMPLE_PER_EPOCH": 60000
}

# DATA.SEARCH
cfg["DATA"]["SEARCH"] = {
    "NUMBER": 1,  # number of search region, only support 1 for now.
    "SIZE": 256,
    "FACTOR": 4.0,
    "CENTER_JITTER": 3.5,
    "SCALE_JITTER": 0.5
}

# DATA.TEMPLATE
cfg["DATA"]["TEMPLATE"] = {
    "NUMBER": 1,
    "SIZE": 128,
    "FACTOR": 2.0,
    "CENTER_JITTER": 0,
    "SCALE_JITTER": 0
}

# TEST
cfg["TEST"] = {
    "TEMPLATE_FACTOR": 4.0,
    "TEMPLATE_SIZE": 256,
    "SEARCH_FACTOR": 2.0,
    "SEARCH_SIZE": 128,
    "EPOCH": 500,
    "WINDOW": False,  # window penalty
    "NUM_TEMPLATES": 1
}

# TEST.UPT
cfg["TEST"]["UPT"] = {
    "DEFAULT": 1,
    "LASOT": 0,
    "LASOT_EXTENSION_SUBSET": 0,
    "TRACKINGNET": 0,
    "TNL2K": 0,
    "NFS": 0,
    "UAV": 0,
    "VOT20": 0,
    "GOT10K_TEST": 0
}

# TEST.UPH
cfg["TEST"]["UPH"] = {
    "DEFAULT": 1,
    "LASOT": 0,
    "LASOT_EXTENSION_SUBSET": 0,
    "TRACKINGNET": 0,
    "TNL2K": 0,
    "NFS": 0,
    "UAV": 0,
    "VOT20": 0,
    "GOT10K_TEST": 0
}

# TEST.INTER
cfg["TEST"]["INTER"] = {
    "DEFAULT": 999999,
    "LASOT": 0,
    "LASOT_EXTENSION_SUBSET": 0,
    "TRACKINGNET": 0,
    "TNL2K": 0,
    "NFS": 0,
    "UAV": 0,
    "VOT20": 0,
    "GOT10K_TEST": 0
}

# TEST.MB
cfg["TEST"]["MB"] = {
    "DEFAULT": 500,
    "LASOT": 0,
    "LASOT_EXTENSION_SUBSET": 0,
    "TRACKINGNET": 0,
    "TNL2K": 0,
    "NFS": 0,
    "UAV": 0,
    "VOT20": 0,
    "GOT10K_TEST": 0
}

In [73]:
#Params
class TrackerParams:
    """Class for tracker parameters."""
    def set_default_values(self, default_vals: dict):
        for name, val in default_vals.items():
            if not hasattr(self, name):
                setattr(self, name, val)

    def get(self, name: str, *default):
        """Get a parameter value with the given name. If it does not exists, it return the default value given as a
        second argument or returns an error if no default value is given."""
        if len(default) > 1:
            raise ValueError('Can only give one default value.')

        if not default:
            return getattr(self, name)

        return getattr(self, name, default[0])

    def has(self, name: str):
        """Check if there exist a parameter with the given name."""
        return hasattr(self, name)

def _update_config(base_cfg, exp_cfg):
    if isinstance(base_cfg, dict) and isinstance(exp_cfg, dict):
        for k, v in exp_cfg.items():
            if k in base_cfg:
                if not isinstance(v, dict):
                    base_cfg[k] = v
                else:
                    _update_config(base_cfg[k], v)
            else:
                raise ValueError("{} not exist in config.py".format(k))
    else:
        return

def update_config_from_file(filename):
    exp_config = None
    with open(filename) as f:
        exp_config = yaml.safe_load(f)
        _update_config(cfg, exp_config)
    
def parameters(yaml_name: str):
    params = TrackerParams()

    yaml_file = "mcitrack_t224.yaml"
    update_config_from_file(yaml_file)
    params.cfg = cfg
    print("test config: ", cfg)

    params.yaml_name = yaml_name
    # template and search region
    params.template_factor = cfg["TEST"]["TEMPLATE_FACTOR"]
    params.template_size = cfg["TEST"]["TEMPLATE_SIZE"]
    params.search_factor = cfg["TEST"]["SEARCH_FACTOR"]
    params.search_size = cfg["TEST"]["SEARCH_SIZE"]

    # Network checkpoint path
    params.checkpoint = "MCITrack.trt"
    # whether to save boxes from all queries
    params.save_all_boxes = False

    return params

params = parameters("./mcitrack_t224.yaml")

test config:  {'MODEL': {'ENCODER': {'TYPE': 'fastitpnt', 'DROP_PATH': 0.1, 'PRETRAIN_TYPE': './fast_itpn_tiny_1600e_1k.pt', 'USE_CHECKPOINT': False, 'STRIDE': 16, 'POS_TYPE': 'index', 'TOKEN_TYPE_INDICATE': True, 'INTERACTION_INDEXES': [[4, 7], [7, 10], [10, 13], [13, 16]], 'GRAD_CKPT': False}, 'NECK': {'N_LAYERS': 4, 'D_MODEL': 384, 'D_STATE': 16}, 'DECODER': {'TYPE': 'CENTER', 'NUM_CHANNELS': 256}}, 'TRAIN': {'LR': 0.0004, 'WEIGHT_DECAY': 0.0001, 'EPOCH': 300, 'LR_DROP_EPOCH': 240, 'BATCH_SIZE': 64, 'NUM_WORKER': 10, 'OPTIMIZER': 'ADAMW', 'ENCODER_MULTIPLIER': 0.1, 'FREEZE_ENCODER': False, 'ENCODER_OPEN': [], 'CE_WEIGHT': 1.0, 'GIOU_WEIGHT': 2.0, 'L1_WEIGHT': 5.0, 'PRINT_INTERVAL': 50, 'GRAD_CLIP_NORM': 0.1, 'FIX_BN': False, 'ENCODER_W': '', 'TYPE': 'normal', 'PRETRAINED_PATH': None, 'SCHEDULER': {'TYPE': 'step', 'DECAY_RATE': 0.1}}, 'DATA': {'MEAN': [0.485, 0.456, 0.406], 'STD': [0.229, 0.224, 0.225], 'MAX_SAMPLE_INTERVAL': 400, 'SAMPLER_MODE': 'order', 'LOADER': 'tracking', 'TRAIN

In [98]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
def load_engine(engine_path):        
        with open(engine_path, "rb") as engine_file, trt.Runtime(TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(engine_file.read())
        return engine
engine = load_engine("MCITrack.trt")
context = engine.create_execution_context()

In [100]:
#TRT Tracker

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

class MCITRACK(BaseTracker):
    def __init__(self, params):        
        super(MCITRACK, self).__init__(params)
        self.cfg = params.cfg        
        # Загружаем engine (сохраняем как атрибут)
        self.engine = self.load_engine("MCITrack.trt")
        if not self.engine:
            raise RuntimeError("Не удалось загрузить TensorRT engine.")    
        # Сохраняем context
        self.context = self.engine.create_execution_context()
        
        # Сохраняем выделенную память
        self.initialize_memory()

        self.fx_sz = self.cfg["TEST"]["SEARCH_SIZE"] // self.cfg["MODEL"]["ENCODER"]["STRIDE"]
        if self.cfg["TEST"]["WINDOW"] == True:  # for window penalty
            self.output_window = hann2d(torch.tensor([self.fx_sz, self.fx_sz]).long(), centered=True).cuda()
            
        self.num_template = self.cfg["TEST"]["NUM_TEMPLATES"]

        self.preprocessor = Preprocessor()
        self.state = None

        if self.cfg["TEST"]["WINDOW"] == True:  # for window penalty
            self.output_window = hann2d(torch.tensor([self.fx_sz, self.fx_sz]).long(), centered=True).cuda()
        self.frame_id = 0
        self.save_all_boxes = params.save_all_boxes
        self.z_dict1 = {}
        # for update
        self.h_state = [None] * self.cfg["MODEL"]["NECK"]["N_LAYERS"]
        self.memory_bank = self.cfg["TEST"]["MB"]["DEFAULT"]
        self.update_h_t = self.cfg["TEST"]["UPH"]["DEFAULT"]
        self.update_threshold = self.cfg["TEST"]["UPT"]["DEFAULT"]
        self.update_intervals = self.cfg["TEST"]["INTER"]["DEFAULT"]
        
        print(f"Engine initialized: {self.engine is not None}")
        print(f"Context created: {self.context is not None}")
        print(f"CUDA buffers allocated: {hasattr(self, 'd_template_list')}")
        print(f"Current CUDA context: {cuda.Context.get_current()}")
          
    def load_engine(self, engine_path):        
        with open(engine_path, "rb") as engine_file, trt.Runtime(TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(engine_file.read())
        return engine

    def initialize_memory(self):        
        # Входы
        self.d_template_list = cuda.mem_alloc(int(np.prod([1, 3, 112, 112]) * np.dtype(np.float32).itemsize))
        self.d_search_list = cuda.mem_alloc(int(np.prod([1, 3, 112, 112]) * np.dtype(np.float32).itemsize))
        self.d_template_anno_list = cuda.mem_alloc(int(np.prod([1, 3, 112, 112]) * np.dtype(np.float32).itemsize))        
        self.d_unsqueeze_3 = cuda.mem_alloc(int(np.prod([1, 3, 112, 112]) * np.dtype(np.float32).itemsize))
        self.d_unsqueeze_4 = cuda.mem_alloc(int(np.prod([1, 3, 112, 112]) * np.dtype(np.float32).itemsize))
        self.d_unsqueeze_5 = cuda.mem_alloc(int(np.prod([1, 3, 224, 224]) * np.dtype(np.float32).itemsize))
        self.d_unsqueeze_6 = cuda.mem_alloc(int(np.prod([1, 4]) * np.dtype(np.float32).itemsize))
        self.d_unsqueeze_7 = cuda.mem_alloc(int(np.prod([1, 4]) * np.dtype(np.float32).itemsize))
        self.d_unsqueeze_8 = cuda.mem_alloc(int(np.prod([1, 4]) * np.dtype(np.float32).itemsize))
        self.d_unsqueeze_9 = cuda.mem_alloc(int(np.prod([1, 4]) * np.dtype(np.float32).itemsize))
        self.d_unsqueeze_10 = cuda.mem_alloc(int(np.prod([1, 4]) * np.dtype(np.float32).itemsize))
        # Выходы
        self.d_pred_boxes = cuda.mem_alloc(int(np.prod([1, 1, 4]) * np.dtype(np.float32).itemsize))
        self.d_score_map = cuda.mem_alloc(int(np.prod([1, 1, 14, 14]) * np.dtype(np.float32).itemsize))
        self.d_size_map = cuda.mem_alloc(int(np.prod([1, 2, 14, 14]) * np.dtype(np.float32).itemsize))
        self.d_offset_map = cuda.mem_alloc(int(np.prod([1, 2, 14, 14]) * np.dtype(np.float32).itemsize))        
        
    def predict(self, input_feed):                  
                
        template_list = input_feed['template_list']
        search_list = input_feed['search_list']
        template_anno_list = input_feed['template_anno_list']
        unsqueeze_3 = input_feed['onnx::Unsqueeze_3']
        unsqueeze_4 = input_feed['onnx::Unsqueeze_4']
        unsqueeze_5 = input_feed['onnx::Unsqueeze_5']
        unsqueeze_6 = input_feed['onnx::Unsqueeze_6']
        unsqueeze_7 = input_feed['onnx::Unsqueeze_7']
        unsqueeze_8 = input_feed['onnx::Unsqueeze_8']
        unsqueeze_9 = input_feed['onnx::Unsqueeze_9']
        unsqueeze_10 = input_feed['onnx::Unsqueeze_10']


                
        # Копируем данные на GPU (используем выделенные буферы)
        cuda.memcpy_htod(self.d_template_list, template_list.ravel())
        cuda.memcpy_htod(self.d_search_list, search_list.ravel())
        cuda.memcpy_htod(self.d_template_anno_list, template_anno_list.ravel())
        cuda.memcpy_htod(self.d_unsqueeze_3, unsqueeze_3.ravel())
        cuda.memcpy_htod(self.d_unsqueeze_4, unsqueeze_4.ravel())
        cuda.memcpy_htod(self.d_unsqueeze_5, unsqueeze_5.ravel())
        cuda.memcpy_htod(self.d_unsqueeze_6, unsqueeze_6.ravel())
        cuda.memcpy_htod(self.d_unsqueeze_7, unsqueeze_7.ravel())
        cuda.memcpy_htod(self.d_unsqueeze_8, unsqueeze_8.ravel())
        cuda.memcpy_htod(self.d_unsqueeze_9, unsqueeze_9.ravel())
        cuda.memcpy_htod(self.d_unsqueeze_10, unsqueeze_10.ravel())
      
      
            # Пример проверки входных данных перед копированием
        # print("Проверка входных данных:")
        # print(f"template_list: min={template_list.min()}, max={template_list.max()}")
        # print(f"search_list: min={search_list.min()}, max={search_list.max()}")
        # print(f"unsqueeze_3: min={unsqueeze_3.min()}, max={unsqueeze_3.max()}")
        # print(f"unsqueeze_4: min={unsqueeze_4.min()}, max={unsqueeze_4.max()}")
        # print(f"unsqueeze_5: min={unsqueeze_5.min()}, max={unsqueeze_5.max()}")
        # print(f"unsqueeze_6: {unsqueeze_6}")
        # print(f"unsqueeze_7: {unsqueeze_7}")
        # print(f"unsqueeze_8: {unsqueeze_8}")
        # print(f"unsqueeze_9: {unsqueeze_9}")
        # print(f"unsqueeze_10: {unsqueeze_10}")


        # Перед execute_v2
        # print("Проверка bindings перед выполнением:")
        # for i, buf in enumerate([
        #     self.d_template_list, self.d_search_list, self.d_template_anno_list,
        #     self.d_unsqueeze_3, self.d_unsqueeze_4, self.d_unsqueeze_5,
        #     self.d_unsqueeze_6, self.d_unsqueeze_7, self.d_unsqueeze_8,
        #     self.d_unsqueeze_9, self.d_unsqueeze_10,
        #     self.d_pred_boxes, self.d_score_map, self.d_size_map, self.d_offset_map
        # ]):
        #     print(f"Binding {i}: address={int(buf)}")
      
      
           
      
        # Указываем правильные bindings        
        self.context.execute_v2(bindings=[
            int(self.d_template_list),     # 0
            int(self.d_search_list),       # 1
            int(self.d_template_anno_list),# 2
            int(self.d_unsqueeze_3),       # 3
            int(self.d_unsqueeze_4),       # 4
            int(self.d_unsqueeze_5),       # 5
            int(self.d_unsqueeze_6),       # 6
            int(self.d_unsqueeze_7),       # 7
            int(self.d_unsqueeze_8),       # 8
            int(self.d_unsqueeze_9),       # 9
            int(self.d_unsqueeze_10),      # 10
            int(self.d_pred_boxes),       # 11
            int(self.d_score_map),        # 12
            int(self.d_size_map),         # 13
            int(self.d_offset_map)        # 14
        ])
        
        # Копируем результат с GPU
        pred_boxes = np.empty([1, 1, 4], dtype=np.float32)
        # print(pred_boxes)
        cuda.memcpy_dtoh(pred_boxes, self.d_pred_boxes)
        
        score_map = np.empty([1, 1, 14, 14], dtype=np.float32)
        cuda.memcpy_dtoh(score_map, self.d_score_map)

        size_map = np.empty([1, 2, 14, 14], dtype=np.float32)
        cuda.memcpy_dtoh(size_map, self.d_size_map)

        offset_map = np.empty([1, 2, 14, 14], dtype=np.float32)
        cuda.memcpy_dtoh(offset_map, self.d_offset_map)

        return pred_boxes, score_map, size_map, offset_map
        
    def initialize(self, image, info: dict):        
        z_patch_arr, resize_factor = sample_target(image, info['init_bbox'], 
                                self.params.template_factor,
                                output_sz=self.params.template_size)
        
        # Шаблон (хранится как тензор на CPU)
        self.template = self.preprocessor.process(z_patch_arr)
        self.template_list = [self.template] * self.num_template  # Список тензоров
        
        # Аннотации (хранятся как тензоры на CPU)
        prev_box_crop = transform_image_to_crop(torch.tensor(info['init_bbox']),
                                            torch.tensor(info['init_bbox']),
                                            resize_factor,
                                            torch.Tensor([self.params.template_size, 
                                                        self.params.template_size]),
                                            normalize=True)
        self.template_anno_list = [prev_box_crop.unsqueeze(0)] * self.num_template
        
        self.state = info['init_bbox']
        self.frame_id = 0


    def track(self, image, info: dict = None):              
        H, W, _ = image.shape
        self.frame_id += 1

        x_patch_arr, resize_factor = sample_target(image, self.state, 
                                                self.params.search_factor,
                                                output_sz=self.params.search_size)
        
        # Обрабатываем поисковое изображение и перемещаем на CPU
        search = self.preprocessor.process(x_patch_arr)
        search_list = [search]
    
        template_list_np = [t.cpu().numpy() for t in self.template_list]
        search_list_np = [s.cpu().numpy() for s in search_list]
        template_anno_list_np = [ta.cpu().numpy() for ta in self.template_anno_list]
        
        

        all_inputs_np = template_list_np + search_list_np + template_anno_list_np

        # # Получаем количество тензоров
        num_bindings = self.engine.num_io_tensors

        # # Собираем информацию о входах
        input_names = []
        expected_shapes = {}
                
        for i in range(num_bindings):
            name = self.engine.get_tensor_name(i)
            
            # Проверяем, является ли входом
            if hasattr(self.engine, 'binding_is_input'):
                is_input = self.engine.binding_is_input(i)
            else:
                is_input = (self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT)
            
            if is_input:
                input_names.append(name)
                # Получаем ожидаемую форму входа
                if hasattr(self.engine, 'get_binding_shape'):
                    expected_shapes[name] = tuple(self.engine.get_binding_shape(i))
                else:
                    expected_shapes[name] = tuple(self.engine.get_tensor_shape(name))

        # print("\nОжидаемые входы модели:")
        # for name, shape in expected_shapes.items():
        #     print(f"{name}: shape={shape}")

        # # Создаем словарь входных данных
        input_feed = {name: data for name, data in zip(input_names, all_inputs_np)}

        # print("\nФактические передаваемые данные:")
        # for name, data in input_feed.items():
        #     print(f"{name}: shape={data.shape} vs Ожидается: {expected_shapes[name]}")
        #     if data.shape != expected_shapes[name]:
        #         print(f"  !!! Несоответствие размеров для входа {name} !!!")
                        

        outputs = self.predict(input_feed)        
         # Преобразуем выходы в torch тензоры и перемещаем на GPU
        pred_boxes = torch.from_numpy(outputs[0]).cuda()
        score_map = torch.from_numpy(outputs[1]).cuda()
        size_map = torch.from_numpy(outputs[2]).cuda()
        offset_map = torch.from_numpy(outputs[3]).cuda()

        out_dict = {
            'pred_boxes': pred_boxes,
            'score_map': score_map,
            'size_map': size_map,
            'offset_map': offset_map
        }

        # Обработка результатов с учетом CUDA тензоров
        pred_score_map = out_dict['score_map']
        if self.cfg["TEST"]["WINDOW"]:
            # Убедимся, что output_window тоже на GPU
            if not self.output_window.is_cuda:
                self.output_window = self.output_window.cuda()
            response = self.output_window * pred_score_map
        else:
            response = pred_score_map

        pred_boxes, conf_score = cal_bbox(response, out_dict['size_map'], out_dict['offset_map'])
        pred_boxes = pred_boxes.view(-1, 4)
        
        # Перемещаем вычисления на CPU для окончательного результата
        pred_box = (pred_boxes.mean(dim=0) * self.params.search_size / resize_factor).cpu().numpy().tolist()
        
        self.state = clip_box(self.map_box_back(pred_box, resize_factor), H, W, margin=10)        

        return {
            "target_bbox": self.state,
            "best_score": conf_score.cpu().item() if conf_score.is_cuda else conf_score
        }
    
    def map_box_back(self, pred_box: list, resize_factor: float):
        cx_prev, cy_prev = self.state[0] + 0.5 * self.state[2], self.state[1] + 0.5 * self.state[3]
        cx, cy, w, h = pred_box
        half_side = 0.5 * self.params.search_size / resize_factor
        cx_real = cx + (cx_prev - half_side)
        cy_real = cy + (cy_prev - half_side)
        return [cx_real - 0.5 * w, cy_real - 0.5 * h, w, h]

    def map_box_back_batch(self, pred_box: torch.Tensor, resize_factor: float):
        cx_prev, cy_prev = self.state[0] + 0.5 * self.state[2], self.state[1] + 0.5 * self.state[3]
        cx, cy, w, h = pred_box.unbind(-1)
        half_side = 0.5 * self.params.search_size / resize_factor
        cx_real = cx + (cx_prev - half_side)
        cy_real = cy + (cy_prev - half_side)
        return torch.stack([cx_real - 0.5 * w, cy_real - 0.5 * h, w, h], dim=-1)


In [101]:
tracker = MCITRACK(params)

Engine initialized: True
Context created: True
CUDA buffers allocated: True
Current CUDA context: <pycuda._driver.Context object at 0x0000023A5FE8A960>


In [91]:
# Трекинг по видео
file = "0516.mp4"
video = cv2.VideoCapture(file)
ok, image = video.read()
if not video.isOpened():
    print("Could not open video")
    sys.exit()
    
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

x, y, w, h = cv2.selectROI( image, fromCenter=False)
init_state = [x, y, w, h]
def _build_init_info(box):
            return {'init_bbox': box}
tracker.initialize(image, _build_init_info(init_state))
counter = 0
while True:
    ok, image = video.read()
    if not ok:
        break

    # Конвертация для трекера
    tracker_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Трекинг
    start_time = time.time()
    out = tracker.track(tracker_image)
    state = [int(s) for s in out['target_bbox']]
    #best_score = out["best_score"]
    best_score = 1
    fps = 1 / (time.time() - start_time + 1e-6)

    # Визуализация
    display_image = image.copy()
    x, y, w, h = state
    
    # Динамический цвет рамки в зависимости от уверенности
    color = (0, 255, 0) if best_score > 0.7 else (0, 255, 255) if best_score > 0.4 else (0, 0, 255)
    thickness = 3 if best_score > 0.7 else 2
    
    # Рисуем bounding box с увеличенными размерами
    cv2.rectangle(display_image, (x, y), (x + w, y + h), color, thickness)
    
    # Добавляем информационный текст
    cv2.putText(display_image, f"Score: {best_score:.2f}", (x, y-10), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
    cv2.putText(display_image, f"FPS: {fps:.1f}", (20, 40), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
    
    cv2.imshow("tracking", display_image)
    
    # Обработка клавиш
    key = cv2.waitKey(1) & 0xFF
    if key == 32:  # SPACE - переинициализация
        x, y, w, h = cv2.selectROI("Select ROI", image, fromCenter=False)
        if w > 10 and h > 10:  # Минимальный размер ROI
            init_state = [x, y, w, h]
            print("Переинициализация...")
            tracker.initialize(tracker_image, _build_init_info(init_state))
    elif key == 27:  # ESC - выход
        break
cv2.destroyAllWindows()

In [90]:
#Метрики
import numpy as np

def iou(boxA, boxB):
    # boxA, boxB: [x, y, w, h]
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[0] + boxA[2], boxB[0] + boxB[2])
    yB = min(boxA[1] + boxA[3], boxB[1] + boxB[3])

    interW = max(0, xB - xA)
    interH = max(0, yB - yA)
    interArea = interW * interH

    boxAArea = boxA[2] * boxA[3]
    boxBArea = boxB[2] * boxB[3]
    unionArea = boxAArea + boxBArea - interArea

    if unionArea == 0:
        return 0.0
    return interArea / unionArea

def precision(boxA, boxB):
    # центры bbox
    centerA = (boxA[0] + boxA[2]/2, boxA[1] + boxA[3]/2)
    centerB = (boxB[0] + boxB[2]/2, boxB[1] + boxB[3]/2)
    dist = np.sqrt((centerA[0] - centerB[0])**2 + (centerA[1] - centerB[1])**2)
    return dist
sr_thresh = 0.5
prec_thresh = 20

In [103]:
#Трекинг got10k с метриками ONNX
import glob
import time
import  os
gt_bboxes = []
pred_bboxes = []
seq_path = "val/GOT-10k_Val_000001"
txt_files = glob.glob(os.path.join(seq_path, '*.txt'))
if not txt_files:
    raise FileNotFoundError(f"No .txt files found in {seq_path}")

img_files = sorted(glob.glob(os.path.join(seq_path, '*.jpg')))
with open(txt_files[0], 'r') as f:
    gt_bboxes = [list(map(float, line.strip().split(','))) for line in f]

# Получаем размер первого изображения
sample_img = cv2.imread(img_files[0])
if sample_img is None:
    raise ValueError(f"Failed to read sample image: {img_files[0]}")

#height, width = sample_img.shape[:2]
#fourcc = cv2.VideoWriter_fourcc(*'XVID')
#output_filename = f"{seq_path.split('/')[-1]}_output.avi"
#video_vriter = cv2.VideoWriter(output_filename, fourcc, 10, (width, height))  

assert len(img_files) == len(gt_bboxes), "Количество кадров и bbox'ов не совпадает"

x, y, w, h = map(int, gt_bboxes[0])
init_state = [x, y, w, h]

def _build_init_info(box):
            return {'init_bbox': box}

counter = 0


tracker.initialize(sample_img, _build_init_info(init_state))

start_time = time.time()  # Начало замера

for img_file, bbox in zip(img_files, gt_bboxes):
        
        # Читаем изображение
        img = cv2.imread(img_file)
        if img is None:
            print(f"Не удалось загрузить изображение: {img_file}")
            continue
        
        
        out  = tracker.track(img)
        state = [int(s) for s in out['target_bbox']]   
                           
        # Рисуем bounding box        
        x, y, w, h = [int(x) for x in state]

        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 200), 2)
        
        x1, y1, w1, h1 = map(int, bbox)
        cv2.rectangle(img, (x1, y1), (x1+w1, y1+h1), (0, 200, 0), 2)
        bbox_pred = x, y, w, h
        
        gt_bboxes.append(bbox)
        pred_bboxes.append(bbox_pred)

        #cv2.imshow(seq_path, img)
        #video_vriter.write(img)
        counter+=1


        # Выход по нажатию 'q' или ESC
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or key == 27:
            break
       
        
                
end_time = time.time()    # Конец замера    
total_frames = counter       # Общее количество обработанных кадров
total_time = end_time - start_time
fps = total_frames / total_time
ious = [iou(gt, pred) for gt, pred in zip(gt_bboxes, pred_bboxes)]
ao = np.mean(ious)
sr = np.mean([1 if val >= sr_thresh else 0 for val in ious])
precisions = [precision(gt, pred) for gt, pred in zip(gt_bboxes, pred_bboxes)]
prec = np.mean([1 if d <= prec_thresh else 0 for d in precisions])

print(f"GOT: {seq_path}")
print(f"FPS_TRT: {fps:.2f}")
print(f'Success Rate (SR@0.5)_TRT: {sr:.2f}')
print(f'Average Overlap (AO)_TRT: {ao:.2f}')
print(f'Precision @20px_TRT: {prec:.2f}')

#cv2.destroyAllWindows()
#video_vriter.release()
#print(f"Video saved as: {output_filename}")

GOT: val/GOT-10k_Val_000001
FPS_TRT: 36.90
Success Rate (SR@0.5)_TRT: 1.00
Average Overlap (AO)_TRT: 0.93
Precision @20px_TRT: 0.97


In [None]:
#Создание TRT-модели
import tensorrt as trt
import os

def build_engine(onnx_file_path, engine_file_path, precision="FP32", max_batch_size=1, max_workspace_size=1<<30):
    """
    Конвертирует ONNX модель в TensorRT engine (для TensorRT 8.0+)
    
    Параметры:
        onnx_file_path: путь к файлу ONNX модели
        engine_file_path: путь для сохранения engine
        precision: "FP32", "FP16" или "INT8"
        max_batch_size: максимальный размер батча
        max_workspace_size: максимальный размер рабочей памяти (в байтах)
    """
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    
    # Инициализация builder, сети и конфига
    with trt.Builder(TRT_LOGGER) as builder, \
         builder.create_network(explicit_batch) as network, \
         trt.OnnxParser(network, TRT_LOGGER) as parser, \
         builder.create_builder_config() as config:
        
        # Установка максимального размера рабочей памяти
        config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, max_workspace_size)
        
        # Установка precision mode
        if precision == "FP16":
            config.set_flag(trt.BuilderFlag.FP16)
        elif precision == "INT8":
            config.set_flag(trt.BuilderFlag.FP16)
            config.set_flag(trt.BuilderFlag.INT8)
            # Для INT8 требуется калибровочный набор данных
            # config.int8_calibrator = ... 
        
        # Проверка поддержки precision
        if precision == "FP16" and not builder.platform_has_fast_fp16:
            print("FP16 не поддерживается на этой платформе")
        if precision == "INT8" and not builder.platform_has_fast_int8:
            print("INT8 не поддерживается на этой платформе")
        
        # Загрузка ONNX модели
        print(f"Загрузка ONNX модели из файла {onnx_file_path}")
        with open(onnx_file_path, "rb") as model:
            if not parser.parse(model.read()):
                print("Ошибка при парсинге ONNX модели")
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        
        # Оптимизация и создание engine
        print("Строим TensorRT engine. Это может занять некоторое время...")
        
        # Установка максимального размера батча
        # В новых версиях это делается через профиль оптимизации
        profile = builder.create_optimization_profile()
        input_tensor = network.get_input(0)
        input_shape = input_tensor.shape
        profile.set_shape(input_tensor.name, 
                        (1, *input_shape[1:]),  # min shape
                        (max_batch_size, *input_shape[1:]),  # opt shape
                        (max_batch_size, *input_shape[1:]))  # max shape
        config.add_optimization_profile(profile)
        
        # Построение engine
        serialized_engine = builder.build_serialized_network(network, config)
        
        if serialized_engine is None:
            print("Ошибка при создании engine")
            return None
        
        # Сохранение engine в файл
        print(f"Сохранение engine в файл {engine_file_path}")
        with open(engine_file_path, "wb") as f:
            f.write(serialized_engine)
        
        # Возвращаем десериализованный engine для использования
        runtime = trt.Runtime(TRT_LOGGER)
        return runtime.deserialize_cuda_engine(serialized_engine)

def convert_onnx_to_tensorrt(onnx_path, trt_path, precision="FP32"):
    """
    Основная функция конвертации
    """
    # Проверка существования ONNX файла
    if not os.path.exists(onnx_path):
        print(f"Файл {onnx_path} не найден")
        return False
    
    print(f"Начинаем конвертацию {onnx_path} в TensorRT engine")
    
    # Построение engine
    engine = build_engine(onnx_path, trt_path, precision)
    
    if engine is not None:
        print("Конвертация успешно завершена!")
        return True
    else:
        print("Конвертация не удалась")
        return False

# Пример использования
if __name__ == "__main__":
    onnx_model_path = "MCITrack1.onnx"  # Путь к вашей ONNX модели
    trt_engine_path = "MCITrack2.trt"   # Куда сохранить TensorRT engine
    
    # Доступные precision: "FP32", "FP16", "INT8"
    precision_mode = "FP16"  
    
    success = convert_onnx_to_tensorrt(onnx_model_path, trt_engine_path, precision_mode)
    
    if success:
        print(f"TensorRT engine успешно сохранен в {trt_engine_path}")
    else:
        print("Ошибка при конвертации модели")

In [None]:
#Проход по всему got10k
import glob
import time
import  os
import pandas as pd
from tqdm import tqdm
metrics = pd.DataFrame(columns=['Path', 'FPS', 'Success Rate (SR@0.5)', "Average Overlap (AO)", "Precision @20px"])
base_dir = "val/"
folders = os.listdir(f'{base_dir}')
counter_test = 0
for folder in tqdm(folders):
    if folder == "list.txt":
        print(f"{'*' * 20} Завершено! {'*' * 20}")
        break
    gt_bboxes = []
    pred_bboxes = []
    seq_path = os.path.join(base_dir, folder)
    txt_files = glob.glob(os.path.join(seq_path, '*.txt'))
    if not txt_files:
        raise FileNotFoundError(f"No .txt files found in {seq_path}")

    img_files = sorted(glob.glob(os.path.join(seq_path, '*.jpg')))
    with open(txt_files[0], 'r') as f:
        gt_bboxes = [list(map(float, line.strip().split(','))) for line in f]

    # Получаем размер первого изображения
    sample_img = cv2.imread(img_files[0])
    if sample_img is None:
        raise ValueError(f"Failed to read sample image: {img_files[0]}")  

    assert len(img_files) == len(gt_bboxes), "Количество кадров и bbox'ов не совпадает"

    x, y, w, h = map(int, gt_bboxes[0])
    init_state = [x, y, w, h]

    def _build_init_info(box):
                return {'init_bbox': box}

    counter = 0
    counter_test += 1


    tracker.initialize(sample_img, _build_init_info(init_state))

    start_time = time.time()  # Начало замера

    for img_file, bbox in zip(img_files, gt_bboxes):
            
            # Читаем изображение
            img = cv2.imread(img_file)
            if img is None:
                print(f"Не удалось загрузить изображение: {img_file}")
                continue
                        
            out  = tracker.track(img)
            state = [int(s) for s in out['target_bbox']]   
                            
            # Рисуем bounding box        
            x, y, w, h = [int(x) for x in state]

            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 200), 2)
            
            x1, y1, w1, h1 = map(int, bbox)
            cv2.rectangle(img, (x1, y1), (x1+w1, y1+h1), (0, 200, 0), 2)
            bbox_pred = x, y, w, h
            
            gt_bboxes.append(bbox)
            pred_bboxes.append(bbox_pred)
   
            counter+=1

            # Выход по нажатию 'q' или ESC
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q') or key == 27:
                break
        
            
                    
    end_time = time.time()    # Конец замера    
    total_frames = counter       # Общее количество обработанных кадров
    total_time = end_time - start_time
    fps = round(total_frames / total_time)
    ious = [iou(gt, pred) for gt, pred in zip(gt_bboxes, pred_bboxes)]
    ao = np.mean(ious)
    sr = np.mean([1 if val >= sr_thresh else 0 for val in ious])
    precisions = [precision(gt, pred) for gt, pred in zip(gt_bboxes, pred_bboxes)]
    prec = np.mean([1 if d <= prec_thresh else 0 for d in precisions])
       
    if metrics.empty:
        metrics = pd.DataFrame(dict(zip(metrics.columns,
        [folder, fps, sr, ao, prec])), index=[0])
    else:
        metrics = metrics._append(pd.Series(dict(zip(metrics.columns,
        [folder, fps, sr, ao, prec]))), ignore_index=True)
    print(f"folder={folder}, fps={fps}, sr={sr}, ao={ao}, prec={prec}")
    # if counter_test == 3:
    #     print(f"{'*' * 20} Прервано на 3! {'*' * 20}")
    #     break

metrics = metrics._append(pd.Series(dict(zip(metrics.columns,
        ["Average", metrics['FPS'].mean(), metrics['Success Rate (SR@0.5)'].mean(), metrics['Average Overlap (AO)'].mean(), metrics['Precision @20px'].mean()]))), ignore_index=True)
metrics.to_csv('metrics_trt.csv', index=False)
metrics   

In [104]:
#Поиск граничных условий
import glob
import time
from pathlib import Path
import  os
import pandas as pd
from tqdm import tqdm

threshold_metrics = pd.DataFrame(columns=['File', 'FPS', 'Success Rate (SR@0.5)', "Average Overlap (AO)", "Precision @20px", "Euclidean Distance"])
base_dir = "input_data/"
folders = os.listdir(f'{base_dir}')


def bbox_center(bbox):
    """Вычисляет центр bounding box в формате [x1, y1, x2, y2]"""
    center = [(bbox[0] + (bbox[2] / 2)), (bbox[1] + (bbox[3] / 2))]
    print(f"bbox:{bbox}")
    print(f"center: {center}")
    return center

def mean_euclidean_distance(gt_bboxes, pred_bboxes):  
    print(f"len(gt_bboxes):{len(gt_bboxes)}")
    print(f"len(pred_bboxes): {len(pred_bboxes)}")
    if len(gt_bboxes) != len(pred_bboxes):
        raise ValueError("Количество gt_bboxes и pred_bboxes не совпадает!")
        
    distances = []
    for gt_bbox, pred_bbox in zip(gt_bboxes, pred_bboxes):
                 
        # Получаем центры bbox
        gt_center = np.array(bbox_center(gt_bbox))
        pred_center = np.array(bbox_center(pred_bbox))
        
        # Вычисляем евклидово расстояние между центрами
        distance = np.linalg.norm(gt_center - pred_center)
        distances.append(distance)
          
    return np.mean(distances)

for folder in tqdm(folders):   
    counter_files = 0      
    seq_path = os.path.join(base_dir, folder)    
    video_files = glob.glob(os.path.join(seq_path, '*.mp4'))
    txt_files = glob.glob(os.path.join(seq_path, '*.txt'))    
    if not video_files:
        raise FileNotFoundError(f"No files found in {seq_path}")    
        
    for file in video_files:
        gt_bboxes = []
        pred_bboxes = []   
        video_file = video_files[counter_files]
        txt_file = txt_files[counter_files]        
        with open(txt_file, 'r') as f:
            # Пропускаем заголовок
            next(f)
            for line in f:
                # Разбиваем строку на части
                parts = line.strip().split()
                # Извлекаем нужные значения (object_x, object_y, object_width, object_height)
                x = int(parts[1])
                y = int(parts[2])
                width = float(parts[3])
                height = float(parts[4])
                # Добавляем bounding box в список
                gt_bboxes.append([x, y, width, height])

        video = cv2.VideoCapture(video_file)
        
        ok, image = video.read()
        if not video.isOpened():
            print("Could not open video")
            sys.exit()
            
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        x, y, w, h = map(int, gt_bboxes[0])
        init_state = [x, y, w, h]
        def _build_init_info(box):
                    return {'init_bbox': box}
        tracker.initialize(image, _build_init_info(init_state))
        del gt_bboxes[0]
        start_time = time.time()  # Начало замера
        counter = 0
        while counter < len(gt_bboxes):
            ok, image = video.read()            
            if not ok:                
                break
            bbox = gt_bboxes[counter]
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)                    
            out  = tracker.track(image)
            state = [int(s) for s in out['target_bbox']]                                                     
                            
            # Рисуем bounding box  
            x, y, w, h = [int(x) for x in state]
            
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 200), 2)
            
            x1, y1, w1, h1 = map(int, bbox)
            cv2.rectangle(image, (x1, y1), (x1+w1, y1+h1), (0, 200, 0), 2)
            pred_bbox = x, y, w, h                        
            pred_bboxes.append(pred_bbox)

            cv2.imshow("tracking", image)
            
            k = cv2.waitKey(1)           
            
            if k == 27:  # ESC
                break
            counter += 1
        counter_files += 1
        end_time = time.time()    # Конец замера  
        total_frames = counter       # Общее количество обработанных кадров
        total_time = end_time - start_time
        fps = total_frames / total_time
        ious = [iou(gt, pred) for gt, pred in zip(gt_bboxes, pred_bboxes)]
        ao = np.mean(ious)
        sr = np.mean([1 if val >= sr_thresh else 0 for val in ious])
        precisions = [precision(gt, pred) for gt, pred in zip(gt_bboxes, pred_bboxes)]
        prec = np.mean([1 if d <= prec_thresh else 0 for d in precisions])        
        ed = mean_euclidean_distance(gt_bboxes, pred_bboxes)
        print(f"FPS: {fps:.2f}")
        print(f'Success Rate (SR@0.5): {sr:.2f}')
        print(f'Average Overlap (AO): {ao:.2f}')
        print(f'Precision @20px: {prec:.2f}')          
        print(f'Euclidean Distance: {ed:.2f}')         
     
        threshold_metrics = threshold_metrics._append(pd.Series(dict(zip(threshold_metrics.columns,[Path(video_file).stem, fps, sr, ao, prec, ed]))), ignore_index=True)
                
        
cv2.destroyAllWindows()
                             
threshold_metrics = threshold_metrics._append(pd.Series(dict(zip(threshold_metrics.columns,
        ["Average", threshold_metrics['FPS'].mean(), threshold_metrics['Success Rate (SR@0.5)'].mean(), threshold_metrics['Average Overlap (AO)'].mean(), threshold_metrics['Precision @20px'].mean(), threshold_metrics['Euclidean Distance'].mean()]))), ignore_index=True)
threshold_metrics.to_csv('threshold_metrics.csv', index=False)
threshold_metrics




  0%|          | 0/2 [00:00<?, ?it/s]

len(gt_bboxes):1599
len(pred_bboxes): 1599
bbox:[635, 345, 10.0, 10.0]
center: [640.0, 350.0]
bbox:(634, 343, 10, 11)
center: [639.0, 348.5]
bbox:[635, 340, 10.0, 10.0]
center: [640.0, 345.0]
bbox:(635, 339, 10, 10)
center: [640.0, 344.0]
bbox:[635, 335, 10.0, 10.0]
center: [640.0, 340.0]
bbox:(635, 335, 10, 10)
center: [640.0, 340.0]
bbox:[635, 330, 10.0, 10.0]
center: [640.0, 335.0]
bbox:(635, 329, 10, 10)
center: [640.0, 334.0]
bbox:[635, 325, 10.0, 10.0]
center: [640.0, 330.0]
bbox:(634, 324, 10, 10)
center: [639.0, 329.0]
bbox:[635, 320, 10.0, 10.0]
center: [640.0, 325.0]
bbox:(634, 319, 10, 10)
center: [639.0, 324.0]
bbox:[635, 315, 10.0, 10.0]
center: [640.0, 320.0]
bbox:(633, 313, 11, 11)
center: [638.5, 318.5]
bbox:[635, 310, 10.0, 10.0]
center: [640.0, 315.0]
bbox:(634, 309, 10, 10)
center: [639.0, 314.0]
bbox:[635, 305, 10.0, 10.0]
center: [640.0, 310.0]
bbox:(635, 304, 11, 11)
center: [640.5, 309.5]
bbox:[635, 300, 10.0, 10.0]
center: [640.0, 305.0]
bbox:(634, 299, 10, 10)


  threshold_metrics = threshold_metrics._append(pd.Series(dict(zip(threshold_metrics.columns,[Path(video_file).stem, fps, sr, ao, prec, ed]))), ignore_index=True)


len(gt_bboxes):1599
len(pred_bboxes): 1599
bbox:[632, 342, 15.0, 15.0]
center: [639.5, 349.5]
bbox:(631, 343, 14, 15)
center: [638.0, 350.5]
bbox:[632, 337, 15.0, 15.0]
center: [639.5, 344.5]
bbox:(632, 337, 14, 14)
center: [639.0, 344.0]
bbox:[632, 332, 15.0, 15.0]
center: [639.5, 339.5]
bbox:(631, 332, 14, 14)
center: [638.0, 339.0]
bbox:[632, 327, 15.0, 15.0]
center: [639.5, 334.5]
bbox:(631, 327, 14, 15)
center: [638.0, 334.5]
bbox:[632, 322, 15.0, 15.0]
center: [639.5, 329.5]
bbox:(631, 322, 15, 15)
center: [638.5, 329.5]
bbox:[632, 317, 15.0, 15.0]
center: [639.5, 324.5]
bbox:(632, 317, 15, 14)
center: [639.5, 324.0]
bbox:[632, 312, 15.0, 15.0]
center: [639.5, 319.5]
bbox:(631, 312, 15, 15)
center: [638.5, 319.5]
bbox:[632, 307, 15.0, 15.0]
center: [639.5, 314.5]
bbox:(632, 306, 14, 14)
center: [639.0, 313.0]
bbox:[632, 302, 15.0, 15.0]
center: [639.5, 309.5]
bbox:(631, 302, 14, 14)
center: [638.0, 309.0]
bbox:[632, 297, 15.0, 15.0]
center: [639.5, 304.5]
bbox:(631, 297, 15, 15)


 50%|█████     | 1/2 [05:09<05:09, 309.21s/it]

len(gt_bboxes):1599
len(pred_bboxes): 1599
bbox:[8, 2, 9.0, 9.0]
center: [12.5, 6.5]
bbox:(9, 0, 10, 10)
center: [14.0, 5.0]
bbox:[12, 3, 9.0, 9.0]
center: [16.5, 7.5]
bbox:(12, 1, 10, 10)
center: [17.0, 6.0]
bbox:[16, 4, 9.0, 9.0]
center: [20.5, 8.5]
bbox:(16, 3, 10, 10)
center: [21.0, 8.0]
bbox:[20, 5, 9.0, 9.0]
center: [24.5, 9.5]
bbox:(20, 4, 10, 10)
center: [25.0, 9.0]
bbox:[24, 6, 9.0, 9.0]
center: [28.5, 10.5]
bbox:(24, 5, 10, 10)
center: [29.0, 10.0]
bbox:[28, 7, 9.0, 9.0]
center: [32.5, 11.5]
bbox:(28, 5, 10, 10)
center: [33.0, 10.0]
bbox:[32, 8, 9.0, 9.0]
center: [36.5, 12.5]
bbox:(32, 7, 10, 10)
center: [37.0, 12.0]
bbox:[36, 9, 9.0, 9.0]
center: [40.5, 13.5]
bbox:(36, 8, 10, 10)
center: [41.0, 13.0]
bbox:[40, 10, 9.0, 9.0]
center: [44.5, 14.5]
bbox:(41, 9, 10, 10)
center: [46.0, 14.0]
bbox:[44, 11, 9.0, 9.0]
center: [48.5, 15.5]
bbox:(44, 10, 10, 10)
center: [49.0, 15.0]
bbox:[48, 12, 9.0, 9.0]
center: [52.5, 16.5]
bbox:(48, 11, 10, 10)
center: [53.0, 16.0]
bbox:[52, 13, 9.

100%|██████████| 2/2 [10:44<00:00, 322.29s/it]

len(gt_bboxes):1599
len(pred_bboxes): 1599
bbox:[627, 302, 25.0, 25.0]
center: [639.5, 314.5]
bbox:(627, 309, 23, 17)
center: [638.5, 317.5]
bbox:[627, 257, 25.0, 25.0]
center: [639.5, 269.5]
bbox:(610, 295, 53, 52)
center: [636.5, 321.0]
bbox:[627, 212, 25.0, 25.0]
center: [639.5, 224.5]
bbox:(626, 217, 25, 23)
center: [638.5, 228.5]
bbox:[627, 197, 25.0, 25.0]
center: [639.5, 209.5]
bbox:(627, 196, 24, 25)
center: [639.0, 208.5]
bbox:[672, 197, 25.0, 25.0]
center: [684.5, 209.5]
bbox:(607, 195, 70, 43)
center: [642.0, 216.5]
bbox:[717, 197, 25.0, 25.0]
center: [729.5, 209.5]
bbox:(717, 194, 25, 28)
center: [729.5, 208.0]
bbox:[727, 197, 25.0, 25.0]
center: [739.5, 209.5]
bbox:(727, 196, 23, 25)
center: [738.5, 208.5]
bbox:[727, 242, 25.0, 25.0]
center: [739.5, 254.5]
bbox:(732, 242, 19, 16)
center: [741.5, 250.0]
bbox:[727, 287, 25.0, 25.0]
center: [739.5, 299.5]
bbox:(728, 232, 45, 56)
center: [750.5, 260.0]
bbox:[727, 332, 25.0, 25.0]
center: [739.5, 344.5]
bbox:(727, 331, 23, 25)





Unnamed: 0,File,FPS,Success Rate (SR@0.5),Average Overlap (AO),Precision @20px,Euclidean Distance
0,size-10_speed-5_scale-1,54.752178,0.998124,0.755904,1.0,1.184572
1,size-15_speed-5_scale-1,56.96316,1.0,0.817123,1.0,1.298408
2,size-20_speed-5_scale-1,57.360172,1.0,0.84628,1.0,1.212843
3,size-25_speed-5_scale-1,57.485474,1.0,0.87071,1.0,1.313731
4,size-30_speed-5_scale-1,56.408541,1.0,0.884178,1.0,1.236719
5,size-5_speed-5_scale-1,30.255139,0.0,0.001486,0.007505,312.040671
6,size-6_speed-5_scale-1,56.474094,0.0,0.358268,1.0,1.847661
7,size-7_speed-5_scale-1,57.479169,0.0,0.486784,1.0,1.264911
8,size-8_speed-5_scale-1,55.009797,0.998749,0.636018,1.0,0.665069
9,size-9_speed-5_scale-1,54.537957,0.998749,0.705914,1.0,1.308539
