In [1]:
import sys
import os
import cv2
import torch
import numpy as np
from tqdm import tqdm


In [2]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


In [3]:
current_file_path = os.path.abspath("/media/RTCIN9TBA/Interns/RDT2/yai3kor/workspace/MOTIP/demo")
parent_dir = os.path.dirname(current_file_path)
sys.path.append(parent_dir)
os.chdir(parent_dir)
print(f"Current root path is set to {parent_dir}")

Current root path is set to /media/RTCIN9TBA/Interns/RDT2/yai3kor/workspace/MOTIP


In [4]:
from utils.misc import yaml_to_dict
from demo.colormap import get_color
from models.motip import build as build_model
from models.misc import load_checkpoint
from models.runtime_tracker import RuntimeTracker

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
!pwd

/media/RTCIN9TBA/Interns/RDT2/yai3kor/workspace/MOTIP


In [6]:
# --- Configuration ---
frame_dir = "./frames"
detection_path = "./detections.txt"
output_video_path = "./outputs/video_process_demo/tracked_output.mp4"
config_path = "configs/r50_deformable_detr_motip_dancetrack.yaml"
checkpoint_path = "/media/RTCIN9TBA/Interns/RDT2/yai3kor/workspace/r50_deformable_detr_motip_dancetrack.pth"
dtype = torch.float16

In [7]:
import os
import cv2
import torch
from tqdm import tqdm
from demo.colormap import get_color

# --- Utility to extract frame index from filename ---
import re

def extract_frame_index(filename):
    match = re.search(r'_f(\d+)\.png$', filename)
    return int(match.group(1)) if match else -1

In [8]:
# --- Read detections (MOTChallenge format) ---
def load_detections(detection_path):
    detection_dict = {}
    with open(detection_path, "r") as f:
        for line in f:
            fields = line.strip().split(",")
            if len(fields) != 11:
                continue
            frame_id = int(fields[0])
            x, y, w, h = float(fields[2]), float(fields[3]), float(fields[4]), float(fields[5])
            score = float(fields[6])
            cls = float(fields[7])  # optional
            detection = [x, y, w, h, score, cls]
            detection_dict.setdefault(frame_id, []).append(detection)
    return detection_dict


In [9]:
def get_boxes_from_txt(detection_path, current_frame_idx):
    """
    Args:
        detection_path: path to the .txt file
        current_frame_idx: current frame number (int)
    Returns:
        List of boxes [[x, y, w, h], ...] for the current frame
    """
    boxes = []
    with open(detection_path, 'r') as f:
        for line in f:
            fields = line.strip().split(',')
            frame_id = int(fields[0])
            if frame_id == current_frame_idx:
                x = float(fields[2])
                y = float(fields[3])
                w = float(fields[4])
                h = float(fields[5])
                boxes.append([x, y, w, h])
    return boxes


In [10]:
def simple_transform(image, device, max_shorter=800, max_longer=1440, image_dtype=torch.float16):
    from torchvision.transforms import functional as F
    from torchvision.transforms.functional import InterpolationMode

    image = F.to_tensor(image)
    image = F.resize(image, size=max_shorter, max_size=max_longer, interpolation=InterpolationMode.BILINEAR)
    image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    return image.to(device=device, dtype=image_dtype)


In [11]:
def run_tracking_with_detections(frame_dir, detection_path, output_video_path, tracker, device, dtype):
    import cv2
    import os
    from tqdm import tqdm
    from demo.colormap import get_color

    # Get sorted frame paths
    frame_paths = sorted([
        os.path.join(frame_dir, fname)
        for fname in os.listdir(frame_dir)
        if fname.endswith(".png")
    ])

    # Get video size from the first frame
    sample_img = cv2.imread(frame_paths[0])
    height, width = sample_img.shape[:2]
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    video_writer = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height))

    for frame_idx, frame_path in enumerate(tqdm(frame_paths, desc="Tracking")):
        # Load and process image
        frame = cv2.imread(frame_path)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_tensor = simple_transform(frame_rgb, device=device, image_dtype=dtype)

        frame_tensor = frame_tensor.to(device=device, dtype=torch.float32)


        # Load detections for current frame
        external_boxes = get_boxes_from_txt(detection_path, frame_idx)
        if len(external_boxes) > 0:
            external_boxes_tensor = torch.tensor(external_boxes, dtype=torch.float32, device=device)
        else:
            external_boxes_tensor = torch.empty((0, 4), dtype=torch.float32, device=device)

        # Track
        tracker.update_with_external_detections(frame_tensor, external_boxes_tensor)
        results = tracker.get_track_results()

        # Draw boxes
        for bbox, obj_id in zip(results["bbox"], results["id"]):
            x, y, w, h = map(int, bbox)
            color = get_color(obj_id.item(), rgb=False, use_int=True)
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, f"ID: {obj_id.item()}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        video_writer.write(frame)

    video_writer.release()
    print(f"Tracking video saved to: {output_video_path}")


In [12]:
# --- Define Paths and Tracker ---
if __name__ == "__main__":
    frame_dir = "/media/RTCIN9TBA/Interns/RDT2/yai3kor/workspace/data/BOSCH/LB-UH_103_20181116_145538_005"
    detection_path = "/media/RTCIN9TBA/Interns/RDT2/yai3kor/workspace/data/BOSCH/bosch_5.txt"
    output_video_path = "./outputs/video_process_demo/bosch.mp4"
    config_path = "configs/r50_deformable_detr_motip_dancetrack.yaml"
    checkpoint_path = "/media/RTCIN9TBA/Interns/RDT2/yai3kor/workspace/r50_deformable_detr_motip_dancetrack.pth"

    os.makedirs(os.path.dirname(output_video_path), exist_ok=True)

    # Choose device: 'cuda:0', 'cuda:1', or 'cpu'
    device_str = 'cuda:1'
    device = torch.device(device_str if torch.cuda.is_available() else 'cpu')
    dtype = torch.float32

    config = yaml_to_dict(config_path)
    model, _ = build_model(config)
    load_checkpoint(model, checkpoint_path)
    model.to(device)
    for name, param in model.named_parameters():
        if param.device.type == 'meta':
            print(f"❌ Meta tensor: {name}")


    tracker = RuntimeTracker(
        model=model,
        sequence_hw=(2160, 3840),  # replace with your real height/width
        assignment_protocol="object-max",
        miss_tolerance=30,
        det_thresh=0.5,
        newborn_thresh=0.5,
        id_thresh=0.2,
        dtype=torch.float32,
        device=device
    )

    run_tracking_with_detections(frame_dir, detection_path, output_video_path, tracker, device, dtype)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Tracking:  38%|███▊      | 348/927 [01:18<02:09,  4.46it/s]

error in ms_deformable_im2col_cuda: an illegal memory access was encountered





RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling cublasLtMatmul with transpose_mat1 1 transpose_mat2 0 m 256 n 23674 k 256 mat1_ld 256 mat2_ld 256 result_ld 256 abcType 0 computeType 68 scaleType 0

In [None]:
from models.utils.misc import nested_tensor_from_tensor_list
