In [1]:
import os
import glob
import cv2
import torch
import numpy as np
from transformers import DetrImageProcessor, DetrForObjectDetection
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from torchvision.ops import box_iou


2024-08-18 03:13:23.318845: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-18 03:13:23.501571: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-18 03:13:23.565740: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-18 03:13:23.585559: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-18 03:13:23.722504: I tensorflow/core/platform/cpu_feature_guar

In [10]:
keyframes_dir = '/media/daoan/T7 Shield2/VN_Multi_User_Video_Search/Keyframes'
all_keyframe_paths = dict()
for part in sorted(os.listdir(keyframes_dir)):
#     data_part = part.split('_')[-1] # L01, L02 for ex
    all_keyframe_paths[part] =  dict()

for data_part in sorted(all_keyframe_paths.keys()):
    data_part_path = f'{keyframes_dir}/{data_part}'
    video_dirs = sorted(os.listdir(data_part_path))
    video_ids = [video_dir.split('_')[-1] for video_dir in video_dirs]
    for video_id, video_dir in zip(video_ids, video_dirs):
        keyframe_paths = sorted(glob.glob(f'{data_part_path}/{video_dir}/*.jpg'))
        all_keyframe_paths[data_part][video_id] = keyframe_paths

In [13]:
class VisualEncoding:
    def __init__(self,
                 classes=('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
                          'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
                          'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                          'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
                          'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
                          'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
                          'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
                          'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
                          'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
                          'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
                          'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
                          'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
                          'scissors', 'teddy bear', 'hair drier', 'toothbrush'),
                 colors=('black', 'blue', 'brown', 'green', 'grey', 'orange_', 'pink', 'purple',
                         'red', 'white', 'yellow'),
                 row_str=["0", "1", "2", "3", "4", "5", "6"],
                 col_str=["a", "b", "c", "d", "e", "f", "g"]):

        self.classes = classes
        self.colors = colors
        self.classes2idx = dict()
        for i, class_ in enumerate(classes):
            self.classes2idx[class_] = i
        self.n_row = len(row_str)
        self.n_col = len(col_str)

        x_pts = np.linspace(0, 1, self.n_row + 1)
        y_pts = np.linspace(0, 1, self.n_col + 1)

        self.grid_bboxes = []
        self.grid_labels = []
        for i in range(self.n_row):
            for j in range(self.n_col):
                label = col_str[j] + row_str[i]
                self.grid_bboxes.append([x_pts[j], y_pts[i], x_pts[j + 1], y_pts[i + 1]])
                self.grid_labels.append(label)

        self.grid_bboxes = np.array(self.grid_bboxes)

    def visualize_grid(self, grid_vis=None):
        if grid_vis is None:
            grid_vis = np.zeros((500, 500, 1))

        vis_h, vis_w, _ = grid_vis.shape
        font = cv2.FONT_HERSHEY_SIMPLEX
        fontScale = 0.5
        color = (255, 0, 0)
        thickness = 2
        for i in range(self.n_row * self.n_col):
            x_start, y_start, x_end, y_end = self.grid_bboxes[i]
            label = self.grid_labels[i]
            org = (int((x_start + (x_end - x_start) / 2) * vis_w), int((y_start + (y_end - y_start) / 2) * vis_h))

            # Draw text
            grid_vis = cv2.putText(grid_vis, label, org, font, fontScale, color, thickness, cv2.LINE_AA)
            # Draw grid
            grid_vis = cv2.rectangle(grid_vis, (int(x_start * vis_w), (int(y_start * vis_h))),
                                     (int(x_end * vis_w), int(y_end * vis_h)), color, thickness)
        plt.imshow(grid_vis)

    def encode_bboxes(self, bboxes, labels):
        '''
        Args:
            bboxes: np.array: (n_bboxes, 4) - expected normalized bbox in form (x0, y0, x1, y1)
            labels: np.array: (n_bboxes, )
        '''
        iou = box_iou(torch.as_tensor(bboxes), torch.as_tensor(self.grid_bboxes))
        bboxes_idx, locs_idx = np.nonzero(iou.numpy())

        context = []
        for bbox_idx, loc_idx in zip(bboxes_idx, locs_idx):
            context.append(self.grid_labels[loc_idx] + self.classes[labels[bbox_idx]].replace(" ", ""))
        context = ' '.join(map(str, context))
        return context

    def encode_classes(self, labels):
        '''
        Args:
            labels: np.array: (n_bboxes, )
        '''
        unique_classes, counts = np.unique(labels, return_counts=True)
        context = []
        for unique_class, count in zip(unique_classes, counts):
            for i in range(count):
                context.append(self.classes[unique_class].replace(" ", "") + str(i))
        context = ' '.join(map(str, context))
        return context

    def encode_numbers(self, labels):
        '''
        Args:
            labels: np.array: (n_bboxes, )
        '''
        unique_classes, counts = np.unique(labels, return_counts=True)
        context = []
        for unique_class, count in zip(unique_classes, counts):
            context.append(self.classes[unique_class].replace(" ", "") + str(count))
        context = ' '.join(map(str, context))
        return context

    def encode_colors(self, bboxes, colors):
        '''
        Args:
            bboxes: np.array: (n_bboxes, 4) - expected normalized bbox in form (x0, y0, x1, y1)
            colors: np.array: (n_bboxes, )
        '''
        iou = box_iou(torch.as_tensor(bboxes), torch.as_tensor(self.grid_bboxes))
        bboxes_idx, locs_idx = np.nonzero(iou.numpy())

        context = []
        for bbox_idx, loc_idx in zip(bboxes_idx, locs_idx):
            context.append(self.grid_labels[loc_idx] + colors[bbox_idx].replace("_", ""))
        context = ' '.join(map(str, context))
        return context

    def encode(self, bboxes=None, labels=None, bboxes_colors=None, colors=None):
        '''
        Args:
            bboxes: np.array: (n_bboxes, 4) - expected normalized bbox in form (x0, y0, x1, y1)
            labels: np.array: (n_bboxes, )
        '''
        results = dict()
        if bboxes is not None:
            results['bbox'] = self.encode_bboxes(bboxes, labels)
            results['class'] = self.encode_classes(labels)
        else:
            results['bbox'] = results['class'] = None

        if bboxes_colors is not None:
            results['color'] = self.encode_colors(bboxes_colors, colors)
        else:
            results['color'] = None

        return results

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(device)


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [17]:
import os
import torch
from tqdm import tqdm
from PIL import Image
import torchvision.transforms as T

# Define the transform
transform = T.Compose([
    T.Resize((800, 800)),  # Resize to the required size (adjust as needed)
    T.ToTensor(),  # Convert to tensor
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet means and stds (adjust if different for your model)
])

# Batch size and directories
bs = 4
save_dirs = ["bboxes_encoded", "classes_encoded", "number_encoded"]
encoder = VisualEncoding()
conf_threshold = 0.5

# Assuming 'device' is already defined, e.g., device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Ensure the model is on the correct device
model.to(device)

# Create directories if they don't exist
for save_dir in save_dirs:
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

for key, video_keyframe_paths in tqdm(all_keyframe_paths.items()):
    video_ids = sorted(video_keyframe_paths.keys())

    for save_dir in save_dirs:
        key_dir = os.path.join(save_dir, key)
        if not os.path.exists(key_dir):
            os.makedirs(key_dir)

    for video_id in tqdm(video_ids):
        video_keyframe_path = video_keyframe_paths[video_id]
        video_encoded_bboxes, video_encoded_classes, video_encoded_numbers = [], [], []

        for i in tqdm(range(0, len(video_keyframe_path), bs)):
            # Support batchsize inferencing
            image_paths = video_keyframe_path[i:i+bs]
            
            images = [Image.open(path).convert("RGB") for path in image_paths]
            images = [transform(image).unsqueeze(0).to(device) for image in images]  # Apply transform and move to device
            images = torch.cat(images)

            model.eval()
            with torch.no_grad():
                results = model(images)  # Perform inference
            
            # Check the structure of the result
            for result in results:
                # Print the result to understand its structure
                # Assuming the result is a dictionary with keys 'boxes', 'labels', and 'scores'
                if isinstance(result, dict):
                    bboxes = result.get('boxes').cpu().numpy().copy()
                    labels = result.get('labels').cpu().numpy().copy().astype(int)
                    scores = result.get('scores').cpu().numpy().copy()

                    # Filter out low-confidence results
                    high_conf_indices = scores >= conf_threshold
                    bboxes = bboxes[high_conf_indices]
                    labels = labels[high_conf_indices]

                    if len(bboxes) == 0:
                        video_encoded_bboxes.append("")
                        video_encoded_classes.append("")
                        video_encoded_numbers.append("")
                    else:
                        video_encoded_bboxes.append(encoder.encode_bboxes(bboxes, labels))
                        video_encoded_classes.append(encoder.encode_classes(labels))
                        video_encoded_numbers.append(encoder.encode_numbers(labels))
                else:
                    print("Unexpected result format:", type(result))

        # Save the video context txt
        with open(f"bboxes_encoded/{key}/{video_id}.txt", "w") as f:
            for item in video_encoded_bboxes:
                f.write("%s\n" % item)
        with open(f"classes_encoded/{key}/{video_id}.txt", "w") as f:
            for item in video_encoded_classes:
                f.write("%s\n" % item)
        with open(f"number_encoded/{key}/{video_id}.txt", "w") as f:
            for item in video_encoded_numbers:
                f.write("%s\n" % item)


  0%|          | 0/36 [00:00<?, ?it/s]
  0%|          | 0/31 [00:00<?, ?it/s][A

  0%|          | 0/269 [00:00<?, ?it/s][A[A

  1%|          | 2/269 [00:00<00:24, 10.79it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-15.0064,  -0.4377,  -4.1459,  ...,  -8.0918,  -4.4866,   9.6941],
         [-18.2308,  -2.3481,  -4.5492,  ...,  -8.6540,  -5.6152,  12.2046],
         [-18.0417,  -1.3560,  -6.1195,  ..., -10.9284,  -5.3899,  11.2066],
         ...,
         [-17.4687,  -1.2289,  -6.9598,  ...,  -6.3507,  -3.1151,   9.9956],
         [-18.3279,  -3.2072,  -6.6606,  ..., -11.3488,  -6.5307,  11.9128],
         [-18.0381,  -2.1562,  -4.3081,  ...,  -7.7382,  -5.4919,  12.0679]],

        [[-14.6960,  -0.0658,  -3.8947,  ...,  -7.7823,  -4.5178,   9.4281],
         [-18.2931,  -3.0905,  -4.3888,  ...,  -6.9935,  -5.8552,  12.0963],
         [-16.6944,  -0.7175,  -4.9582,  ...,  -9.1602,  -5.5355,  10.4543],
         ...,
         [-17.4453,  -1.1707,  -6.5351,  ...,  -5.9869,  -3.5557,   9.9831],
         [-18.5105,  -2.5208,  -5.9311,  ..., -10.3030,  -6.7576,  11.9855],
         [-16.9608,  -1.3454,  -5.9397,  ...,  -5.6322,  -3.729



  1%|▏         | 4/269 [00:00<00:23, 11.10it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-13.8887,   6.8728,  -4.9564,  ...,  -6.3692,  -4.8594,   6.2813],
         [-14.2749,   6.6047,  -5.5384,  ...,  -6.5083,  -5.0309,   7.0414],
         [-15.0092,   6.5997,  -3.9654,  ...,  -5.9606,  -6.9469,   6.3719],
         ...,
         [-14.6186,   1.0017,  -5.8676,  ...,  -6.5000,  -4.2980,   6.5992],
         [-15.5190,   5.6165,  -3.6941,  ...,  -8.1600,  -6.1118,   7.3561],
         [-12.4185,   1.8322,  -4.3913,  ...,  -4.9567,  -3.6637,   5.9353]],

        [[-13.8430,   6.8946,  -5.0161,  ...,  -6.6334,  -4.9593,   6.4429],
         [-13.4616,   7.2503,  -5.2002,  ...,  -6.9863,  -5.3047,   7.0820],
         [-14.6953,   3.5914,  -4.5250,  ...,  -7.0425,  -7.5981,   6.5418],
         ...,
         [-14.7325,   1.0669,  -6.5611,  ...,  -6.3324,  -4.6202,   6.8460],
         [-15.4680,   6.6763,  -4.7502,  ...,  -8.5353,  -6.5396,   7.5325],
         [-12.5980,   1.9118,  -4.9815,  ...,  -4.9394,  -4.036



  2%|▏         | 6/269 [00:00<00:25, 10.51it/s][A[A

  3%|▎         | 8/269 [00:00<00:23, 10.88it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.8501,   2.4125,  -1.7046,  ...,  -9.6962,  -4.7753,   7.7608],
         [-15.0389,   0.0321, -11.6606,  ..., -10.4584,   5.2897,   9.5112],
         [-13.8866,  -1.4045,  -5.3993,  ...,  -5.4586,  -2.8743,   7.3913],
         ...,
         [-15.5678,   2.9876,  -8.0080,  ...,  -6.6144,  -1.2316,   7.9229],
         [-16.3844,   2.7351,  -7.1462,  ...,  -8.4074,  -2.0876,   8.4550],
         [-15.9522,   1.4634,  -5.5580,  ...,  -1.8367,  -1.0993,   8.9090]],

        [[-14.0595,   3.9820,  -4.1293,  ...,  -7.8071,  -2.2521,   7.8592],
         [-15.0311,   0.2227, -12.6866,  ..., -10.3775,   6.0753,   9.6792],
         [-14.1325,  -1.2664,  -6.2211,  ...,  -5.7856,  -2.8647,   7.5568],
         ...,
         [-15.2153,   2.7114,  -8.9028,  ...,  -3.2216,   0.3914,   7.6242],
         [-16.2205,   3.0233,  -7.5994,  ...,  -6.6803,  -1.5880,   8.4147],
         [-15.8506,   1.8547,  -6.0418,  ...,  -1.3191,   0.370



  4%|▎         | 10/269 [00:00<00:23, 10.85it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-16.5508,  -0.5733,  -5.4116,  ...,  -2.9153,  -5.4420,   8.2464],
         [-15.1780,   2.8384,  -4.7076,  ...,  -3.0268,  -2.8481,   9.1739],
         [-16.8206,  -1.3344,  -5.3395,  ...,  -4.2744,  -3.6851,   9.0109],
         ...,
         [-17.1406,   5.8374,  -5.6424,  ...,  -7.5965,  -6.3014,   9.6771],
         [-17.9021,   3.9748,  -4.9722,  ...,  -6.6263,  -6.4893,   9.6150],
         [-17.5644,   5.2700,  -5.7332,  ...,  -8.6799,  -6.0144,   9.5657]],

        [[-16.6434,   2.4956,  -2.8907,  ...,  -6.1035,  -7.4560,   7.8412],
         [-17.4148,   4.3671,  -5.4575,  ...,  -9.0110,  -6.5796,  10.1162],
         [-16.5913,   0.8897,  -3.9169,  ...,  -5.9231,  -4.9144,   8.8819],
         ...,
         [-17.5372,   5.8144,  -5.7079,  ...,  -7.9644,  -7.3256,   9.9791],
         [-17.8048,   5.7513,  -3.5450,  ..., -10.5003,  -8.4927,   9.4316],
         [-18.9253,   5.7367,  -6.2071,  ...,  -8.2529,  -6.561



  4%|▍         | 12/269 [00:01<00:23, 11.07it/s][A[A

  5%|▌         | 14/269 [00:01<00:23, 10.89it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.8346,  -1.0721,  -3.4793,  ...,  -7.1491,  -6.2519,   6.6731],
         [-15.3820,  -0.9719,  -5.6188,  ...,  -5.4920,  -6.5107,   6.9512],
         [-14.2177,  -2.3584,  -2.9394,  ...,  -9.3170,  -2.6281,   7.1700],
         ...,
         [-16.0474,   0.3007,  -4.1225,  ...,  -6.8593,  -9.3852,   6.9530],
         [-15.7649,  -1.5529,  -3.3511,  ...,  -8.1400,  -6.0314,   7.5328],
         [-13.7010,  -1.7427,  -7.3477,  ...,  -3.8057,  -5.6899,   5.7876]],

        [[-15.3045,  -0.3450,  -4.3590,  ...,  -7.4139,  -8.3701,   7.1559],
         [-16.4897,   0.7745,  -3.5120,  ...,  -6.2856,  -9.1895,   7.8838],
         [-15.0755,   3.8257,  -3.5940,  ...,  -7.1107,  -6.5043,   7.1185],
         ...,
         [-16.4561,  -0.2744,  -4.6232,  ...,  -7.1561, -10.2146,   7.5957],
         [-16.2994,  -0.5283,  -3.5182,  ...,  -8.3446,  -9.1258,   7.7591],
         [-14.8404,   0.1764,  -4.9471,  ...,  -6.0466,  -8.210



  6%|▌         | 16/269 [00:01<00:22, 11.05it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.5656,  -0.8675, -11.9519,  ...,  -4.5596,  -4.2660,  10.0285],
         [-18.2696,  -1.4022, -12.0452,  ...,  -5.7045,  -3.7291,  10.7298],
         [-16.3527,  -2.6237, -10.1396,  ...,  -2.4845,  -1.0406,   8.6031],
         ...,
         [-17.1471,  -0.6620, -11.3897,  ...,  -3.0735,  -2.7228,  10.3551],
         [-17.6347,  -0.8463, -12.4068,  ...,  -2.5169,   0.2472,  10.0531],
         [-17.2590,  -1.1327, -13.0467,  ...,  -2.6673,  -1.6837,  10.6437]],

        [[-17.4926,  -1.5645, -12.6198,  ...,  -3.4566,  -2.2690,  10.3458],
         [-18.8650,  -2.2248, -13.1819,  ...,  -2.8733,  -2.7521,  11.4628],
         [-16.7215,  -2.8225, -10.2690,  ...,  -3.0789,  -0.9441,   8.8937],
         ...,
         [-16.5130,  -0.7052, -11.6561,  ...,   0.3184,  -1.7974,   9.6596],
         [-18.6076,  -0.3283, -10.8473,  ...,  -4.8520,  -3.9710,  10.3656],
         [-18.5772,  -1.7249, -12.0713,  ...,  -1.8659,  -3.127



  7%|▋         | 18/269 [00:01<00:23, 10.84it/s][A[A

  7%|▋         | 20/269 [00:01<00:23, 10.70it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-1.6551e+01,  9.0628e-01, -4.0596e+00,  ..., -8.1432e+00,
          -1.0917e+01,  7.6301e+00],
         [-1.7307e+01, -9.6476e-02, -6.4012e+00,  ..., -8.7376e+00,
          -7.6591e+00,  9.1486e+00],
         [-1.8112e+01, -1.4456e+00, -4.2690e+00,  ..., -9.5199e+00,
          -1.2658e+01,  9.0626e+00],
         ...,
         [-1.7151e+01,  7.8650e-01, -5.5753e+00,  ..., -7.5692e+00,
          -4.8671e+00,  9.1636e+00],
         [-1.6355e+01,  9.6035e+00, -5.0421e+00,  ..., -9.9930e+00,
          -8.7070e+00,  7.5938e+00],
         [-1.6950e+01, -2.0226e-01, -6.5073e+00,  ..., -1.0107e+01,
          -4.1214e+00,  9.2690e+00]],

        [[-1.6746e+01, -1.6588e-01, -4.2402e+00,  ..., -8.7973e+00,
          -1.3192e+01,  8.2233e+00],
         [-1.8052e+01, -1.0873e-02, -6.0184e+00,  ..., -8.5144e+00,
          -9.7780e+00,  9.8588e+00],
         [-1.8794e+01, -2.4026e+00, -3.7413e+00,  ..., -1.2872e+01,
          -1.503



  8%|▊         | 22/269 [00:02<00:22, 10.84it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.4338,   1.4994,  -3.7987,  ...,  -2.6608,  -5.1160,   5.7417],
         [-13.8709,   4.2417,  -5.7312,  ...,  -6.0033,  -2.9097,   6.2994],
         [-15.0223,   0.6130,  -3.6990,  ...,  -2.9895,  -8.3327,   6.0882],
         ...,
         [-14.7412,   2.8363,  -7.8864,  ...,  -3.4864,  -3.2900,   7.4406],
         [-14.5596,   9.4161,  -5.7169,  ...,  -9.0217,  -6.4483,   6.6513],
         [-15.3949,   6.1505,  -6.2742,  ...,  -4.7103,  -4.6461,   6.9348]],

        [[-13.2575,  -0.6688,  -3.8906,  ...,  -5.6658,  -6.3538,   5.4457],
         [-12.9197,   6.7812,  -5.6291,  ...,  -6.2968,  -3.8150,   6.0141],
         [-15.3347,   1.2722,  -2.0749,  ...,  -4.4542, -12.3224,   6.4545],
         ...,
         [-14.3635,   4.3331,  -5.5068,  ...,  -2.8738,  -4.2066,   5.9496],
         [-13.7466,  10.2911,  -5.6403,  ..., -10.2828,  -7.1922,   5.9468],
         [-14.1141,   6.4429,  -4.8946,  ...,  -5.0982,  -4.849



  9%|▉         | 24/269 [00:02<00:22, 11.03it/s][A[A

 10%|▉         | 26/269 [00:02<00:21, 11.15it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-1.8443e+01,  2.0506e+00, -1.1833e+01,  ..., -2.0877e+00,
           2.2154e+00,  1.0156e+01],
         [-1.6006e+01, -1.3016e-03, -1.5310e+01,  ...,  3.7047e+00,
           7.5460e+00,  1.0181e+01],
         [-1.9148e+01,  2.9473e+00, -1.2230e+01,  ..., -2.7144e+00,
           2.5124e+00,  1.0203e+01],
         ...,
         [-1.7504e+01,  4.2598e+00, -1.1969e+01,  ..., -4.5558e+00,
           7.5815e-01,  9.3616e+00],
         [-1.7401e+01,  5.6007e+00, -1.2361e+01,  ..., -4.5523e+00,
           1.7517e+00,  9.1672e+00],
         [-1.8497e+01, -3.4816e-01, -1.1119e+01,  ..., -2.5005e+00,
           5.5980e+00,  9.9574e+00]],

        [[-1.8386e+01,  1.5855e+00, -1.3041e+01,  ..., -2.3194e+00,
           3.1368e+00,  1.0319e+01],
         [-1.6164e+01,  9.0123e-01, -1.6893e+01,  ...,  4.7912e+00,
           7.3640e+00,  1.0353e+01],
         [-1.8975e+01,  1.8744e+00, -1.3719e+01,  ..., -4.2520e+00,
           2.787



 10%|█         | 28/269 [00:02<00:21, 11.19it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.8258,  -1.3346,  -3.7011,  ...,  -8.7871,  -8.5759,   7.3909],
         [-14.3235,   5.0156,  -6.0203,  ...,  -6.8928,  -4.0554,   7.8597],
         [-15.5718,  -1.2077,  -4.4928,  ...,  -8.3610,  -5.4078,   7.2148],
         ...,
         [-15.4860,   4.7030,  -7.6818,  ...,  -6.9891,  -5.0964,   8.2968],
         [-15.3076,   7.9596,  -5.8321,  ...,  -9.6577,  -7.0067,   6.9800],
         [-13.5653,   7.6295,  -6.4989,  ...,  -7.2712,  -4.7015,   6.9353]],

        [[-14.8042,  -1.1333,  -4.4768,  ...,  -8.2654,  -9.1669,   7.4198],
         [-14.4535,   3.4055,  -4.1300,  ...,  -7.1237,  -5.6054,   7.7095],
         [-16.1577,  -0.5779,  -5.4163,  ...,  -6.6213,  -4.9793,   7.6238],
         ...,
         [-16.6032,   1.5987,  -6.2059,  ...,  -7.5121,  -5.5407,   8.5845],
         [-15.9125,   6.6250,  -6.1085,  ...,  -9.4638,  -8.0623,   7.7235],
         [-15.1335,   3.5876,  -4.7769,  ...,  -6.5016,  -5.481



 11%|█         | 30/269 [00:02<00:20, 11.41it/s][A[A

 12%|█▏        | 32/269 [00:02<00:20, 11.38it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-15.3484,   4.0656,  -6.2857,  ...,  -7.0283,  -3.8394,   8.5200],
         [-17.0544,   4.8861,  -8.7262,  ...,  -9.2841,  -5.9962,   9.3635],
         [-16.2422,   4.0221,  -4.8823,  ...,  -8.4496,  -5.3500,   8.5372],
         ...,
         [-16.8974,   2.9582,  -7.3070,  ...,  -6.5800,  -5.1553,   9.4998],
         [-14.7196,  10.4863,  -5.4089,  ...,  -9.8214,  -7.2666,   7.0036],
         [-17.4809,   3.2992,  -7.7235,  ...,  -6.1116,  -5.0405,   9.8639]],

        [[-15.7071,   3.8595,  -5.7571,  ...,  -7.3716,  -4.4744,   7.8718],
         [-18.6398,  -0.3552,  -7.6178,  ...,  -9.9741,  -4.1170,  10.3805],
         [-18.3240,  -0.6258,  -5.1747,  ...,  -7.8846,  -4.7838,   9.3274],
         ...,
         [-17.0822,   2.7499,  -7.8628,  ...,  -8.1715,  -5.9594,   9.3612],
         [-14.1980,  10.6252,  -6.0537,  ...,  -9.9997,  -6.5387,   6.5805],
         [-17.8099,   2.9215,  -8.3140,  ...,  -7.4344,  -4.697



 13%|█▎        | 34/269 [00:03<00:20, 11.40it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.9018,  -1.2807,  -6.3100,  ...,  -7.1743,  -6.0336,   6.4816],
         [-16.4238,   0.7565,  -5.6701,  ...,  -5.4126,  -4.4203,   8.1389],
         [-16.3439,   3.2126,  -2.7597,  ...,  -6.6636,  -7.2799,   7.2689],
         ...,
         [-15.5709,   1.9859, -10.3390,  ...,  -3.6857,  -2.5224,   7.6425],
         [-14.6732,   9.7401,  -6.5439,  ...,  -9.5121,  -6.9212,   6.2764],
         [-16.8953,   0.3353,  -7.8650,  ...,  -4.1254,  -3.3411,   7.9025]],

        [[-14.7982,  -1.2543,  -6.2895,  ...,  -6.4464,  -5.8076,   6.4060],
         [-15.5323,  -3.1985,  -6.0839,  ...,  -8.8518,  -4.9382,   6.6890],
         [-17.2868,   1.8957,  -4.0908,  ...,  -7.6480,  -7.6752,   7.6511],
         ...,
         [-14.9220,  -2.0897, -12.2035,  ...,  -6.7224,  -2.3781,   7.2369],
         [-15.0568,   8.4171,  -6.3305,  ...,  -8.9605,  -6.7212,   6.4340],
         [-13.4971,  -0.7727, -12.8053,  ...,  -5.7296,  -2.409



 13%|█▎        | 36/269 [00:03<00:20, 11.41it/s][A[A

 14%|█▍        | 38/269 [00:03<00:20, 11.23it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-16.2267,   3.3597,  -4.6764,  ...,  -7.6918, -10.8738,   9.0612],
         [-16.4949,   0.7030,  -7.7004,  ...,  -4.9226,  -3.4778,   9.0529],
         [-15.7626,   5.0770,  -4.8885,  ...,  -5.4695,  -6.0858,   8.6324],
         ...,
         [-17.0458,  -0.3555,  -7.3915,  ...,  -2.5130,  -4.0417,   8.8376],
         [-16.7221,   6.9497,  -6.7592,  ...,  -5.9700,  -6.3948,   9.1027],
         [-16.9084,  -0.4585,  -7.8804,  ...,  -3.0847,  -4.5629,   8.7470]],

        [[-16.9541,   4.5743,  -3.5318,  ...,  -9.3710,  -9.9048,   9.2026],
         [-16.3112,   1.4638,  -8.5029,  ...,  -7.9260,  -5.2686,   9.4239],
         [-15.9001,   5.4100,  -3.7334,  ...,  -7.2171,  -7.0366,   8.5912],
         ...,
         [-16.2461,   0.0468,  -5.7565,  ...,  -3.5578,  -3.5274,   7.8905],
         [-15.4671,   7.2678,  -5.8353,  ...,  -5.8405,  -6.1362,   7.9941],
         [-16.3384,   0.5360,  -7.7937,  ...,  -6.1665,  -3.199



 15%|█▍        | 40/269 [00:03<00:22, 10.15it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.0643,  -3.2849,  -4.3525,  ..., -10.4921,  -6.7187,  10.3856],
         [-18.7276,  -4.8531,  -6.2500,  ..., -15.4205,  -8.3386,  11.5420],
         [-17.4602,  -4.3196,  -5.0002,  ..., -10.1721,  -5.6920,  10.3650],
         ...,
         [-18.6068,  -3.8497,  -5.4646,  ..., -14.0891,  -7.7993,  11.1514],
         [-18.5686,  -5.0781,  -6.7268,  ..., -19.6053,  -8.6540,  10.7950],
         [-18.9802,  -4.2596,  -6.0813,  ..., -14.7330,  -7.4342,  11.3455]],

        [[-17.5161,  -4.4014,  -4.1900,  ..., -11.8158,  -6.8250,  10.7145],
         [-18.6355,  -4.1679,  -5.6639,  ..., -16.3226,  -8.6104,  11.3651],
         [-17.3938,  -4.7698,  -4.7540,  ..., -11.1061,  -5.9252,  10.4506],
         ...,
         [-17.0628,  -1.7096,  -8.3346,  ...,  -9.3843,  -4.6856,  10.6320],
         [-17.9744,  -4.1852,  -4.6677,  ..., -11.7187,  -5.8889,  10.7507],
         [-18.8478,  -5.1471,  -6.2273,  ..., -18.4373,  -9.461



 16%|█▌        | 42/269 [00:03<00:23,  9.82it/s][A[A

 16%|█▌        | 43/269 [00:04<00:23,  9.57it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.4046,   4.0177,  -3.1529,  ...,  -7.0225,  -5.7238,   8.3265],
         [-17.3855,   1.3517,  -6.6949,  ...,  -2.6459,  -5.1766,   9.9379],
         [-18.2026,   3.4198,  -3.1283,  ..., -11.8418,  -7.4804,   9.8983],
         ...,
         [-15.5499,   0.0626,  -6.2858,  ...,  -1.6201,  -5.1231,   8.7169],
         [-16.6777,   4.9110,  -3.9109,  ..., -10.8764,  -7.9506,   9.2134],
         [-15.1755,  -0.1580,  -6.2324,  ...,  -1.3115,  -5.0193,   8.4913]],

        [[-16.8459,   6.0961,   0.2373,  ...,  -7.0200,  -9.1357,   9.5431],
         [-16.5955,   5.7760,  -2.7055,  ...,  -6.5678,  -6.7684,   9.4666],
         [-17.9529,   5.4314,  -0.8610,  ...,  -8.4539,  -9.2687,   9.5938],
         ...,
         [-17.2182,   6.5553,  -5.8073,  ...,  -6.4952,  -5.7903,   9.5702],
         [-16.9210,   6.5608,  -3.8286,  ..., -10.5715,  -7.2981,   8.7938],
         [-17.0210,   7.7559,  -4.6378,  ...,  -6.7111,  -6.851



 17%|█▋        | 45/269 [00:04<00:23,  9.62it/s][A[A

 17%|█▋        | 47/269 [00:04<00:21, 10.15it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-16.4995,  -0.0668,  -5.4249,  ...,  -5.4881,  -6.3645,   8.7246],
         [-15.5254,  -1.3908,  -5.5808,  ...,  -9.6229,  -1.8732,   9.7770],
         [-18.3261,   1.1120,  -3.5256,  ...,  -8.1888,  -5.1281,   9.7443],
         ...,
         [-18.2465,   1.6438,  -6.0397,  ...,  -6.3516,  -6.6720,  10.3871],
         [-18.5975,   6.1715,  -5.0556,  ...,  -8.2474,  -4.1241,   9.9402],
         [-17.3820,   0.4970,  -2.8506,  ...,  -8.6873,  -4.8155,   9.2616]],

        [[-17.7583,   1.1057,  -5.5323,  ...,  -8.4181,  -7.2114,   8.7091],
         [-16.7848,  -0.8923,  -6.9830,  ...,  -3.4558,  -0.8142,   9.6124],
         [-16.9557,   0.4489,  -5.3056,  ...,  -5.5012,  -2.4572,   8.6416],
         ...,
         [-16.6660,  -0.2410,  -5.5803,  ...,  -3.9117,  -2.1096,   9.7169],
         [-18.2597,   6.0428,  -6.5984,  ..., -10.3975,  -7.4760,   9.3114],
         [-16.7180,   0.5859,  -6.4248,  ...,  -4.3664,  -1.233



 18%|█▊        | 49/269 [00:04<00:20, 10.49it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-16.2755,  -4.0248,  -5.3861,  ...,  -8.3943,  -5.1390,   9.8521],
         [-16.6633,  -2.6108,  -7.0639,  ..., -11.1530,  -7.0923,  10.5975],
         [-17.0331,  -4.4376,  -5.4428,  ...,  -9.0949,  -5.6553,  10.2240],
         ...,
         [-17.1153,  -4.0467,  -5.2799,  ..., -10.2239,  -5.5938,  10.3181],
         [-17.9854,  -5.7350,  -6.7032,  ..., -14.6843,  -7.8328,  11.0167],
         [-16.5889,  -3.2672,  -6.7124,  ..., -11.8426,  -6.1019,  10.9665]],

        [[-15.6675,  -1.0043,  -7.7178,  ...,  -9.7975,  -4.1842,   9.6830],
         [-17.2415,  -2.8434,  -6.8069,  ...,  -8.7989,  -5.7422,  10.3896],
         [-16.4697,  -1.3102,  -6.7830,  ..., -10.5574,  -6.5258,  10.7077],
         ...,
         [-17.5012,  -3.7898,  -5.4497,  ..., -10.4333,  -6.8312,  10.3589],
         [-18.4424,  -2.4891,  -8.1937,  ..., -12.6861,  -6.9328,  11.3559],
         [-16.6882,  -3.5059,  -6.9008,  ...,  -9.8695,  -5.881



 19%|█▉        | 51/269 [00:04<00:20, 10.59it/s][A[A

 20%|█▉        | 53/269 [00:04<00:19, 10.87it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.6337,  -3.3826, -10.3401,  ...,  -4.8808,  -6.6982,   9.4909],
         [-16.3129,  -2.5726,  -9.8694,  ...,  -3.0661,  -7.5887,   7.7172],
         [-18.9123,  -3.7828, -10.3562,  ...,  -5.0124,  -5.7204,  10.1219],
         ...,
         [-17.3212,  -0.9372,  -8.0363,  ...,  -5.3410,  -8.3449,   8.3927],
         [-16.8611,  -0.0455,  -6.2604,  ...,  -2.5647,  -5.3572,   8.2560],
         [-16.0569,  -2.8683, -10.2019,  ...,  -3.4115,  -7.6309,   7.5145]],

        [[-17.1127,  -4.2472,  -6.0852,  ...,  -8.9232,  -8.2440,   8.8849],
         [-16.2014,  -0.0951,  -5.2067,  ...,  -5.6797,  -8.3239,   7.9674],
         [-14.2851,   0.8950,  -0.8780,  ...,  -5.6111,  -5.3939,   7.0903],
         ...,
         [-16.7173,  -1.8729,  -6.0190,  ...,  -7.2175,  -8.8012,   8.1676],
         [-16.8492,  -2.0355,  -4.9158,  ...,  -8.1993,  -7.7750,   9.0982],
         [-14.3159,  -2.4958,  -9.2377,  ...,  -3.8080,  -5.795



 20%|██        | 55/269 [00:05<00:20, 10.65it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-15.7816,   0.8629,  -6.0420,  ...,  -4.0183,  -4.0335,   7.8393],
         [-17.4774,   2.1383,  -7.7590,  ...,   1.5260,  -2.6591,  10.1680],
         [-15.5955,  -3.0430,  -4.8735,  ...,  -5.6153,  -1.6849,   8.4532],
         ...,
         [-16.6764,   3.3262,  -8.3190,  ...,  -2.4609,  -3.3285,   8.8701],
         [-16.3072,   4.0549,  -5.7540,  ...,  -3.3253,  -5.1601,   9.2761],
         [-16.9677,   2.6508,  -6.6269,  ...,  -0.7690,  -4.2008,   9.5148]],

        [[-15.5224,   0.4360,  -3.9059,  ...,  -7.2446,  -8.2979,   7.9207],
         [-17.4711,   0.4659,  -6.5761,  ...,  -6.3672,  -2.9935,  10.0262],
         [-14.7369,   2.0444,  -2.8315,  ...,  -9.0395,  -9.4987,   6.0290],
         ...,
         [-16.5317,  -0.7656,  -4.5685,  ...,  -5.3537,  -5.9371,   8.4715],
         [-17.1047,  -1.6702,  -2.1067,  ..., -11.7623,  -9.9606,   8.6456],
         [-14.7513,   0.7487,  -4.9023,  ...,  -1.3285,  -6.102



 21%|██        | 57/269 [00:05<00:19, 10.72it/s][A[A

 22%|██▏       | 59/269 [00:05<00:19, 10.84it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-15.6598,   3.4996,  -5.1662,  ...,  -2.2797,  -4.5603,   7.3718],
         [-15.6449,   0.9985,  -5.7033,  ...,  -3.1426,  -4.7280,   7.4141],
         [-13.5737,  -0.5450,  -6.1763,  ...,   1.4685,  -2.3138,   5.8977],
         ...,
         [-15.8285,   4.7933,  -6.0864,  ...,  -4.7657,  -4.6941,   7.7614],
         [-12.4082,  12.3216,  -3.8335,  ...,  -7.4546,  -5.4239,   4.4994],
         [-14.3048,   6.2750,  -7.7041,  ...,  -0.3237,  -2.1194,   7.0939]],

        [[-15.2163,   4.6546,  -4.8078,  ...,  -2.5287,  -5.1764,   7.3201],
         [-15.2634,   3.0783,  -5.5079,  ...,  -3.3280,  -5.1252,   7.0207],
         [-13.6476,   1.4825,  -2.9945,  ...,   0.1003,  -5.0452,   5.8825],
         ...,
         [-15.4856,   4.5318,  -6.7159,  ...,  -3.8676,  -4.0895,   7.4795],
         [-12.3418,  11.7916,  -4.0262,  ...,  -7.2698,  -5.3951,   4.5555],
         [-14.8131,   3.1495,  -9.4089,  ...,   3.4130,  -0.926



 23%|██▎       | 61/269 [00:05<00:19, 10.76it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.7238,  -1.8730, -11.3530,  ...,  -3.7560,  -5.9839,  10.1745],
         [-19.5422,  -2.4521,  -9.6269,  ...,  -7.1356,  -5.0515,  11.0850],
         [-18.4951,  -2.1405, -10.1012,  ...,  -6.1949,  -6.9420,   9.9400],
         ...,
         [-18.2704,  -1.1478, -10.6131,  ...,  -3.8999,  -4.7451,  10.6972],
         [-14.6465,   0.1110, -12.0642,  ...,  -2.7902,  -2.9416,   7.7489],
         [-18.9025,  -3.2663, -11.8338,  ...,  -3.5613,  -3.6128,  10.7540]],

        [[-17.4881,  -2.1101, -11.9041,  ...,  -5.0919,  -6.0148,   9.9504],
         [-16.7691,  -2.1488, -12.0060,  ...,  -1.9461,  -2.2829,   9.6326],
         [-17.1762,  -1.1061, -11.1120,  ...,  -5.0831,  -5.1747,   9.1753],
         ...,
         [-16.1419,   0.3233,  -9.7183,  ...,  -2.5050,  -4.6602,   8.9814],
         [-18.8393,  -1.3286,  -9.5846,  ...,  -6.1055,  -6.2551,  10.4908],
         [-17.5408,  -0.3977, -11.5013,  ...,  -2.4663,  -4.315



 23%|██▎       | 63/269 [00:05<00:19, 10.76it/s][A[A

 24%|██▍       | 65/269 [00:06<00:18, 10.93it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-1.7737e+01,  3.6498e+00, -7.8699e+00,  ..., -8.1077e+00,
          -5.6661e+00,  8.8190e+00],
         [-1.7798e+01,  4.2614e-01, -5.8074e+00,  ..., -7.6981e+00,
          -7.2887e+00,  1.0345e+01],
         [-1.9133e+01,  1.1641e+00, -9.0919e+00,  ..., -4.1707e+00,
          -1.8694e+00,  9.9040e+00],
         ...,
         [-1.7335e+01,  3.1934e+00, -6.0834e+00,  ..., -6.8476e+00,
          -5.8795e+00,  9.9596e+00],
         [-1.2572e+01,  1.0552e+01, -7.1286e+00,  ..., -7.6891e+00,
          -3.5662e+00,  5.4675e+00],
         [-1.8922e+01,  1.7978e+00, -8.8144e+00,  ..., -9.1151e+00,
          -2.2249e+00,  1.0569e+01]],

        [[-1.7101e+01,  1.9517e+00, -8.4738e+00,  ..., -7.1908e+00,
          -4.1237e+00,  8.1790e+00],
         [-1.7435e+01, -2.4171e-01, -7.0380e+00,  ..., -8.1017e+00,
          -7.1103e+00,  9.7087e+00],
         [-1.8687e+01, -7.4887e-01, -9.0332e+00,  ..., -3.9790e+00,
          -2.648



 25%|██▍       | 67/269 [00:06<00:18, 10.81it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.7530,   5.0687,  -8.1930,  ...,  -6.1753,  -3.1640,   7.3816],
         [-16.6793,   6.1686, -11.9861,  ...,  -7.1895,  -1.0995,   8.7892],
         [-15.9793,   7.5339,  -8.7186,  ...,  -7.1719,  -1.9856,   7.9419],
         ...,
         [-15.8579,   4.0155,  -9.8019,  ...,  -5.0409,  -4.0211,   8.3164],
         [-16.6042,   4.6559,  -8.6942,  ...,  -8.7019,  -2.0232,   7.4436],
         [-15.7568,   6.0806,  -9.2143,  ...,  -5.2572,  -1.5319,   7.6107]],

        [[-14.0195,   6.2244,  -7.9019,  ...,  -4.4805,  -2.1378,   6.9740],
         [-15.0153,   7.7720,  -8.3949,  ...,  -7.3737,  -3.3501,   8.1394],
         [-14.7884,   5.5497,  -7.0744,  ...,  -6.2392,  -2.4721,   7.4402],
         ...,
         [-16.3056,   2.7865,  -9.3782,  ...,  -6.5749,  -2.8085,   7.6124],
         [-15.7520,   4.3799,  -7.4191,  ...,  -8.2562,  -2.8326,   7.6136],
         [-14.3192,   6.7732,  -8.0022,  ...,  -5.0871,  -2.760



 26%|██▌       | 69/269 [00:06<00:18, 10.94it/s][A[A

 26%|██▋       | 71/269 [00:06<00:17, 11.02it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.1988,   7.5615,  -6.1840,  ...,  -7.0522,  -5.7774,   6.6969],
         [-16.2736,   3.1944,  -7.4937,  ...,  -6.9907,  -6.7830,   9.1615],
         [-14.3010,   0.6043,  -2.0090,  ...,  -6.2491,  -6.1800,   5.3914],
         ...,
         [-14.6371,   5.4060,  -6.4924,  ...,  -7.0853,  -6.9613,   7.4042],
         [-15.7746,   6.2975,  -6.5688,  ...,  -7.2032,  -5.6361,   7.2467],
         [-16.9223,   4.5978,  -6.3665,  ...,  -7.9585,  -7.1571,   8.4442]],

        [[-15.8402,   5.9549,  -5.9575,  ...,  -7.0934,  -6.0475,   7.6281],
         [-15.3228,   1.7556,  -6.8129,  ...,  -7.8223,  -3.6532,   8.9067],
         [-14.6470,   1.2743,  -1.8415,  ...,  -6.6638,  -6.4766,   5.6175],
         ...,
         [-15.7872,   3.8311,  -4.8527,  ...,  -7.8719,  -7.9292,   7.6948],
         [-16.8772,   5.2573,  -5.7798,  ...,  -9.4374,  -7.3215,   7.4680],
         [-14.2859,   7.0731,  -5.1790,  ...,  -7.7925,  -7.510



 27%|██▋       | 73/269 [00:06<00:19, 10.08it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-15.5936,  -1.7738,  -5.8251,  ...,  -6.2003,  -3.4176,   7.2888],
         [-16.1150,  -0.6083,  -6.9436,  ...,   3.2318,   2.1029,   8.7893],
         [-14.8820,  -3.7486,  -4.0437,  ...,  -5.1178,  -4.8323,   6.7985],
         ...,
         [-16.4293,  -0.7631,  -7.3054,  ...,  -3.9150,  -3.0802,   8.6898],
         [-16.1973,   0.8887,  -5.8162,  ...,  -4.2519,  -4.6624,   7.7300],
         [-15.5661,  -1.4226,  -7.4041,  ...,  -0.6690,  -1.5123,   8.0128]],

        [[-15.9834,  -0.4655,  -4.9834,  ...,  -5.8245,  -6.2980,   7.9640],
         [-17.8372,  -1.7570,  -4.9350,  ...,  -1.2777,  -6.8799,   9.4656],
         [-18.7251,  -2.7681,  -5.0457,  ...,  -9.6902,  -9.3523,   9.3472],
         ...,
         [-17.5445,  -1.0230,  -4.5754,  ...,  -4.5802,  -7.4797,   8.3996],
         [-17.4227,  -2.1116,  -8.7822,  ..., -10.4666,  -8.9281,   7.9929],
         [-17.2229,  -1.4639,  -5.5577,  ...,  -5.1132,  -8.911



 28%|██▊       | 75/269 [00:06<00:18, 10.35it/s][A[A

 29%|██▊       | 77/269 [00:07<00:18, 10.54it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-1.6274e+01,  9.1827e-01, -7.3206e+00,  ..., -1.2548e+01,
          -1.0929e+01,  7.5083e+00],
         [-1.7277e+01,  2.0034e+00, -7.2556e+00,  ..., -1.3384e+01,
          -1.1929e+01,  7.8806e+00],
         [-1.1534e+01, -2.0169e+00, -5.9609e+00,  ..., -5.5609e+00,
          -1.2030e+00,  5.4251e+00],
         ...,
         [-1.7452e+01, -5.5511e-02, -8.1689e+00,  ..., -1.3587e+01,
          -1.0906e+01,  8.2333e+00],
         [-1.2669e+01,  1.1414e+01, -5.0679e+00,  ..., -9.9234e+00,
          -6.0024e+00,  5.0043e+00],
         [-1.6209e+01,  1.5779e+00, -7.5524e+00,  ..., -7.9276e+00,
          -6.5167e+00,  7.7726e+00]],

        [[-1.5899e+01,  2.0971e+00, -6.7090e+00,  ..., -1.1678e+01,
          -9.9305e+00,  6.8104e+00],
         [-1.7040e+01,  9.4206e-01, -7.5530e+00,  ..., -1.5343e+01,
          -1.2881e+01,  7.6797e+00],
         [-1.1883e+01, -2.5534e+00, -6.0974e+00,  ..., -3.8042e+00,
           3.529



 29%|██▉       | 79/269 [00:07<00:18, 10.50it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-18.3883,   0.8116,  -6.8706,  ..., -11.1363,  -5.4989,   9.5133],
         [-17.8201,  -0.1224,  -7.9589,  ...,  -7.4934,  -6.2992,   9.0694],
         [-18.4386,   1.9395,  -7.0216,  ..., -10.7560,  -5.7347,   9.6283],
         ...,
         [-18.1929,  -0.3728,  -7.8383,  ...,  -5.4579,  -7.3904,   9.7543],
         [-15.0534,  -1.2531,  -7.3384,  ..., -11.7975,  -7.1268,   7.4391],
         [-18.0411,  -1.6111,  -7.0318,  ...,  -5.6830,  -6.0912,   9.2579]],

        [[-16.7552,   0.9184,  -6.8247,  ..., -10.9731,  -6.3166,   8.2608],
         [-15.8334,  -0.1511,  -7.6339,  ...,  -4.3491,  -5.1689,   7.7825],
         [-16.6928,   1.8981,  -5.9536,  ..., -10.0373,  -7.2780,   8.1745],
         ...,
         [-16.8591,   0.6629,  -7.6265,  ...,  -4.6865,  -4.7328,   8.2517],
         [-14.2117,  -0.3090,  -9.2959,  ...,  -6.8898,  -3.1449,   6.6299],
         [-16.8889,   0.3509,  -8.3422,  ...,  -4.3344,  -4.906



 30%|███       | 81/269 [00:07<00:17, 10.68it/s][A[A

 31%|███       | 83/269 [00:07<00:17, 10.87it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.3756,   8.4043,  -3.6163,  ..., -12.6110, -11.5301,   6.5669],
         [-17.5386,   1.5792,  -5.9397,  ..., -13.9538, -12.2117,   9.4637],
         [-14.1699,   3.5875,  -3.8422,  ..., -13.2797, -13.0349,   7.5685],
         ...,
         [-16.7034,   1.7360,  -6.2960,  ..., -15.0913, -12.3629,   9.0470],
         [-14.8834,   0.5936,  -4.6391,  ..., -17.1177, -14.3421,   8.4959],
         [-15.9630,   1.4849,  -5.1248,  ..., -16.5589, -14.5204,   8.5870]],

        [[-15.8189,   5.9524,  -3.3109,  ..., -11.3721, -10.2751,   7.5057],
         [-15.6953,   4.4343,  -5.3986,  ..., -12.8636, -10.1461,   8.6860],
         [-13.5060,   3.2913,  -2.8812,  ..., -14.2362, -14.7298,   6.8122],
         ...,
         [-17.0120,   5.1158,  -4.3308,  ..., -10.1086,  -9.8267,   8.8197],
         [-15.6276,   3.1778,  -3.4654,  ..., -12.7768, -10.7732,   8.1912],
         [-16.6619,   4.7232,  -5.3903,  ...,  -8.6672,  -8.527



 32%|███▏      | 85/269 [00:07<00:16, 11.02it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-15.8004,   1.3374,  -6.1543,  ...,  -4.6584,  -4.9959,   8.0787],
         [-18.8359,   0.2736,  -7.4985,  ...,  -5.8344,  -5.1199,  10.1819],
         [-14.9831,  -2.1566,  -6.1050,  ...,  -6.3779,  -4.0308,   7.1211],
         ...,
         [-17.8803,  -0.1866,  -8.9717,  ...,  -7.7891,  -5.5410,  10.3416],
         [-17.7872,  -0.6149,  -8.5434,  ...,  -6.9060,  -4.7266,  10.1291],
         [-16.3407,  -2.1217,  -6.4883,  ...,  -8.5508,  -5.1734,   8.3900]],

        [[-17.2085,  -3.8428,  -2.8915,  ..., -11.0639,  -7.4538,   9.9438],
         [-17.6803,  -4.7266,  -4.1806,  ..., -16.8486,  -8.7075,  10.4714],
         [-16.4274,  -4.7641,  -3.5933,  ...,  -8.5970,  -5.5153,   9.4022],
         ...,
         [-19.6225,  -3.1950,  -7.5266,  ..., -10.8723,  -7.7484,  11.7776],
         [-19.0637,  -3.4445,  -7.0576,  ..., -11.1194,  -7.3417,  11.3774],
         [-18.6820,  -3.8238,  -6.5146,  ..., -14.5016,  -9.043



 32%|███▏      | 87/269 [00:08<00:16, 11.17it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.9755,  -0.3237,  -8.7098,  ...,  -5.6281,  -4.4286,   9.5482],
         [-17.5982,  -1.3602, -12.3421,  ...,  -2.1863,   0.4649,  10.1920],
         [-17.3887,   6.6171,  -6.3843,  ...,  -7.1717,  -3.4868,   9.1127],
         ...,
         [-18.2182,  -2.5655, -12.6155,  ...,  -2.8274,  -2.5640,  10.3263],
         [-17.5307,  -0.0958,  -7.9605,  ...,  -8.0726,  -5.1387,  10.4236],
         [-17.1871,  -2.4724, -10.6962,  ...,  -3.6112,   0.5143,  10.4847]],

        [[-16.6786,   4.5958,  -8.0489,  ...,  -6.0693,  -2.7329,   8.7672],
         [-17.1384,  -3.7854, -10.9154,  ...,  -5.9778,  -0.1276,  10.4561],
         [-15.8250,   7.0866,  -7.1434,  ...,  -7.1196,  -2.8632,   7.9340],
         ...,
         [-17.8111,  -0.3812,  -9.9674,  ...,  -7.4284,  -4.3344,  10.3112],
         [-17.0699,  -1.6656,  -9.0199,  ...,  -5.6180,  -0.8624,  10.1064],
         [-16.6194,  -4.2521,  -9.9862,  ...,  -6.0607,   1.465



 33%|███▎      | 89/269 [00:08<00:16, 10.70it/s][A[A

 34%|███▍      | 91/269 [00:08<00:16, 10.69it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-15.3939,   1.1837,   1.5024,  ...,  -9.9122,  -9.2204,   7.1737],
         [-12.9105,   7.9091,  -2.0461,  ...,  -8.9149,  -5.0358,   5.6930],
         [-14.4498,   2.0315,   2.6086,  ..., -10.4054,  -8.1005,   7.6333],
         ...,
         [-13.9649,   6.4484,  -6.4097,  ...,  -6.1608,  -2.4941,   6.1313],
         [-12.2064,  11.1793,  -3.9578,  ...,  -8.4964,  -4.1154,   5.1964],
         [-14.3299,   6.3934,  -2.7376,  ...,  -9.4267,  -5.0965,   6.0404]],

        [[-15.3927,   1.3852,   1.0875,  ..., -10.1096,  -8.5802,   7.1636],
         [-13.8182,   7.4603,  -2.6081,  ...,  -9.9592,  -5.0459,   5.9126],
         [-14.3620,   1.4548,   3.1342,  ..., -10.7998,  -8.5143,   7.5565],
         ...,
         [-15.0083,   5.6927,  -3.3612,  ...,  -9.3960,  -5.4650,   6.7219],
         [-12.1158,  11.1345,  -3.7525,  ...,  -9.4319,  -4.2430,   5.2845],
         [-14.2540,   6.6238,  -2.6817,  ..., -10.2585,  -5.558



 35%|███▍      | 93/269 [00:08<00:16, 10.77it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.3271,  -3.4983,  -3.1234,  ..., -10.5117,  -6.6290,  10.2607],
         [-19.2303,   0.5531,  -7.9218,  ...,  -9.9696,  -8.9907,  12.1305],
         [-16.8983,  -4.4613,  -3.9533,  ...,  -9.4612,  -5.1917,   9.9545],
         ...,
         [-18.6892,  -4.1471, -11.2532,  ..., -10.9861,  -5.0732,  11.5898],
         [-18.8182,  -4.2273, -10.1841,  ..., -11.4006,  -5.4749,  11.3586],
         [-19.2793,   1.5417,  -7.4057,  ..., -10.4273,  -9.0340,  12.3480]],

        [[-16.5046,  -3.7224,  -2.7177,  ..., -10.0561,  -6.4963,   9.9683],
         [-18.7818,  -4.0325,  -7.3592,  ..., -11.5858, -11.6519,  12.1727],
         [-16.5904,  -4.6935,  -3.3667,  ...,  -9.6437,  -5.3540,   9.8041],
         ...,
         [-19.0118,  -4.1351,  -8.1020,  ..., -11.5024,  -7.0396,  11.5895],
         [-19.4869,  -4.7750,  -9.3243,  ..., -12.3256,  -5.8147,  11.5874],
         [-19.0112,  -3.6748,  -7.3627,  ..., -11.8507, -11.427



 35%|███▌      | 95/269 [00:08<00:15, 10.99it/s][A[A

 36%|███▌      | 97/269 [00:09<00:15, 11.10it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-1.5357e+01, -1.6249e-02, -4.9063e+00,  ..., -2.4110e+00,
          -5.7330e+00,  8.4389e+00],
         [-1.5856e+01,  1.2621e+00, -6.1298e+00,  ..., -1.0597e+00,
          -8.8581e-01,  8.3722e+00],
         [-1.6126e+01, -2.5039e+00, -4.2577e+00,  ..., -5.8955e+00,
          -3.9699e+00,  8.3172e+00],
         ...,
         [-1.7694e+01,  2.4668e+00, -9.3132e+00,  ..., -4.9265e-01,
          -4.0010e+00,  8.6493e+00],
         [-1.6039e+01,  2.2833e+00, -6.0604e+00,  ..., -4.2309e+00,
          -4.1842e+00,  8.0989e+00],
         [-1.7129e+01,  2.6409e+00, -6.5484e+00,  ..., -4.9799e-01,
          -4.0650e+00,  8.9095e+00]],

        [[-1.5529e+01,  1.7120e+00, -5.0066e+00,  ..., -1.8912e+00,
          -5.2818e+00,  8.1546e+00],
         [-1.7646e+01,  4.7697e-01, -8.3148e+00,  ..., -5.0639e+00,
          -6.4305e+00,  9.5881e+00],
         [-1.5700e+01,  4.6149e+00, -5.0217e+00,  ..., -3.8437e+00,
          -6.113



 37%|███▋      | 99/269 [00:09<00:14, 11.38it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-16.8985,   2.8487,  -3.6808,  ..., -10.6593,  -9.9763,   8.8862],
         [-17.8781,   2.6783,  -3.9909,  ...,  -9.9830,  -9.8102,   9.3652],
         [-18.2098,  -0.2109,  -4.6534,  ..., -12.7514,  -8.9451,   9.8961],
         ...,
         [-18.6439,   0.5942,  -4.3675,  ..., -11.9645, -10.2140,   9.9143],
         [-18.0408,  -2.1762,  -3.7311,  ..., -14.2001,  -9.2753,  10.4395],
         [-18.4040,   0.4868,  -3.7913,  ..., -10.9053, -10.7825,   9.7929]],

        [[-18.1300,   4.4236,  -5.3721,  ..., -10.4635,  -7.0092,   9.9305],
         [-17.3224,   2.6562,  -4.5074,  ...,  -9.7276,  -8.4237,   9.2421],
         [-18.0647,   3.4720,  -4.1041,  ..., -11.0109,  -6.8489,   9.5439],
         ...,
         [-18.1258,   1.0462,  -5.3552,  ..., -11.2364,  -7.6728,   9.5470],
         [-16.9955,  -3.1893,  -3.5705,  ..., -12.8210,  -8.1329,   9.2385],
         [-18.3402,  -0.4832,  -5.3733,  ..., -10.2541,  -8.186



 38%|███▊      | 101/269 [00:09<00:15, 11.12it/s][A[A

 38%|███▊      | 103/269 [00:09<00:14, 11.14it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-13.4025,   5.6801,  -3.1928,  ...,  -6.0500,  -4.4958,   6.7115],
         [-14.5393,   5.5772,  -6.7956,  ...,  -1.9420,  -2.3683,   7.6069],
         [-15.1370,   4.0438,  -5.5888,  ...,  -1.0831,  -3.7244,   7.7070],
         ...,
         [-15.3750,   2.1201,  -9.4646,  ...,  -0.8945,   0.7728,   7.2500],
         [-15.0964,   4.3091,  -5.2255,  ...,  -2.0889,  -2.7134,   7.4254],
         [-14.2133,   4.6973,  -6.3226,  ...,  -3.0597,  -1.5896,   6.7986]],

        [[-13.6183,   6.0414,  -2.8065,  ...,  -7.4082,  -5.0187,   7.0223],
         [-14.8580,   6.7911,  -5.4335,  ...,  -5.3589,  -4.7396,   8.2201],
         [-14.4481,   5.6489,  -3.2671,  ...,  -5.2811,  -5.4817,   7.5319],
         ...,
         [-15.4784,   3.8756,  -7.2345,  ...,  -4.4444,  -0.9535,   7.4848],
         [-15.4999,   5.8308,  -4.6037,  ...,  -5.6676,  -4.4821,   8.1701],
         [-13.5809,   8.0771,  -5.0314,  ...,  -5.7818,  -3.192



 39%|███▉      | 105/269 [00:09<00:15, 10.89it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-16.9442,   0.4875,  -4.5386,  ...,  -5.9967,  -7.2206,   8.9098],
         [-16.6406,  -1.9329,  -7.2551,  ...,  -8.5652,  -3.0238,   9.6289],
         [-19.5446,  -1.3132,  -6.7865,  ...,  -8.7417,  -9.0022,  10.3761],
         ...,
         [-16.8507,   2.6849,  -5.4936,  ...,  -4.2574,  -5.5963,   9.0014],
         [-15.8569,   9.7764,  -3.2937,  ...,  -9.1773,  -7.3559,   7.1117],
         [-17.2891,  -1.5515,  -6.8727,  ...,  -5.3488,  -4.0717,   9.5510]],

        [[-18.2001,  -2.1972,  -7.2016,  ...,  -9.8933,  -8.6395,   9.6901],
         [-17.8860,   0.8713,  -8.5603,  ...,  -6.9847,  -2.6403,   9.3931],
         [-18.7359,  -0.9433,  -7.0254,  ...,  -8.1817,  -7.4616,   9.5504],
         ...,
         [-17.6364,   1.0696,  -8.5566,  ...,  -6.9170,  -2.9002,   8.6588],
         [-16.9732,   7.6464,  -3.9932,  ..., -10.2820,  -7.8473,   7.8321],
         [-17.7132,   0.5457,  -8.5981,  ...,  -6.8259,  -2.552



 40%|███▉      | 107/269 [00:09<00:14, 10.93it/s][A[A

 41%|████      | 109/269 [00:10<00:14, 10.91it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.4916,  -0.4953,  -8.3852,  ...,  -9.2417,  -3.3604,  10.0207],
         [-16.3269,   1.0897,  -8.3943,  ...,  -9.9816,  -3.8629,   9.7705],
         [-17.8539,   0.6202,  -9.7523,  ...,  -9.9267,  -3.9915,  10.0036],
         ...,
         [-17.1737,   5.1250,  -8.8714,  ...,  -8.2103,  -3.4634,   9.6796],
         [-13.6399,   7.9369,  -7.0477,  ...,  -9.7951,  -3.0341,   6.6950],
         [-17.3019,   3.4934,  -9.7950,  ...,  -8.8549,  -2.7651,   9.8365]],

        [[-16.9068,  -2.1119,  -8.6277,  ..., -10.4079,  -2.9340,   9.0583],
         [-17.7212,  -1.6943, -12.7595,  ..., -10.0956,  -4.5805,   9.4705],
         [-16.6720,  -1.4642, -11.0828,  ..., -11.2712,  -3.3257,   8.8502],
         ...,
         [-15.4090,   2.6613,  -8.5772,  ...,  -9.6426,  -2.8189,   8.1137],
         [-13.5780,   5.3239,  -7.6539,  ..., -10.5059,  -3.4111,   6.7595],
         [-16.1073,   4.2405, -10.2783,  ..., -10.2467,  -3.204



 41%|████▏     | 111/269 [00:10<00:14, 11.07it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-14.2102,  -0.2395,  -5.7699,  ...,  -4.1553,  -5.5573,   7.0415],
         [-16.3996,  -2.9502,  -4.5945,  ...,  -4.5915,  -5.6052,   8.4317],
         [-17.8332,  -3.2230,  -8.9763,  ..., -10.2476,  -7.7328,  10.0600],
         ...,
         [-16.4832,  -1.2617, -10.0480,  ...,  -4.3413,  -6.1071,   8.3746],
         [-17.7634,  -2.0609,  -6.5702,  ...,  -6.7810,  -7.8223,   9.6604],
         [-16.0169,  -1.7188,  -8.1120,  ...,  -4.1318,  -7.3078,   8.7915]],

        [[-14.2128,  -0.2421,  -5.4467,  ...,  -3.9443,  -4.8916,   7.3918],
         [-16.3490,  -2.3771,  -2.2048,  ...,  -6.3879,  -6.3199,   7.6526],
         [-17.6143,  -3.1590,  -9.7713,  ...,  -8.9950,  -8.2059,   9.4428],
         ...,
         [-16.2747,  -1.5263, -10.8139,  ...,  -4.1082,  -7.0897,   8.2550],
         [-17.1099,  -2.1494,  -7.8522,  ...,  -7.6375,  -7.0380,   8.6339],
         [-15.6134,  -2.1626,  -8.6977,  ...,  -3.6777,  -6.997



 42%|████▏     | 113/269 [00:10<00:13, 11.15it/s][A[A

 43%|████▎     | 115/269 [00:10<00:13, 11.26it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-16.4511,   0.1219,  -6.6215,  ...,  -8.1063, -11.3677,   8.8457],
         [-18.2150,   0.7433,  -7.9720,  ...,  -3.9353,  -6.0759,   9.2531],
         [-17.2112,  -1.8037,  -4.6668,  ...,  -7.6352,  -9.3890,   8.8300],
         ...,
         [-17.1919,  -0.2772,  -7.9251,  ...,  -3.1700,  -5.8955,   9.1989],
         [-14.8201,  10.9312,  -6.0611,  ...,  -7.8679,  -5.9225,   6.7289],
         [-17.8675,   5.6593,  -8.8182,  ...,  -7.7021,  -6.4179,   9.6281]],

        [[-17.8443,   0.7141,  -8.3912,  ...,  -8.6562, -13.1325,   9.9266],
         [-18.1169,  -0.4343, -11.1104,  ...,  -5.7561,  -7.8880,  10.2433],
         [-12.9346,  -3.0071,  -5.8573,  ...,  -4.5975,  -2.2622,   6.6436],
         ...,
         [-17.9516,   1.5924,  -9.6305,  ...,  -3.7513,  -8.0072,  10.4000],
         [-17.8538,   5.3692,  -8.6212,  ...,  -8.1725,  -6.6865,   9.6698],
         [-17.0423,  -1.1418, -13.4423,  ...,  -4.6690,  -6.507



 43%|████▎     | 117/269 [00:10<00:13, 11.24it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-15.1091,  -0.3897,  -5.0438,  ...,  -9.6037,  -9.7433,   7.5993],
         [-15.5967,  -1.9280,  -5.3595,  ..., -11.1990,  -1.1188,   9.3987],
         [-17.4147,   1.4378,  -5.6909,  ...,  -8.9616,  -7.2654,   8.1971],
         ...,
         [-15.7860,  -2.3527,  -3.9235,  ...,  -5.8558,  -7.0606,   8.5407],
         [-13.3797,  10.3728,  -3.9980,  ...,  -9.0548,  -6.6178,   4.9700],
         [-14.9253,  -2.1717,  -3.6617,  ...,  -7.0022,  -4.8360,   8.4936]],

        [[-15.7350,   2.0893,  -7.0003,  ...,  -9.3574, -11.6839,   7.6312],
         [-15.5047,  -2.4254,  -4.7925,  ..., -10.6132,  -2.5814,   9.0521],
         [-16.7890,  -0.4083,  -7.0101,  ...,  -8.8929,  -6.2061,   7.7894],
         ...,
         [-14.8691,  -2.3401,  -4.8747,  ...,  -7.3376,  -5.7451,   8.1618],
         [-15.3428,   8.0834,  -3.5878,  ...,  -9.7461,  -6.8732,   6.9057],
         [-14.8691,  -2.4481,  -4.6810,  ...,  -5.5991,  -7.292



 44%|████▍     | 119/269 [00:10<00:13, 11.22it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.6958,   2.8463,  -6.4560,  ...,  -6.2145,  -5.6370,   8.8586],
         [-15.3700,  -2.0761, -13.3011,  ...,  -1.3409,   4.2343,   9.3424],
         [-18.8108,  -2.8756,  -4.0071,  ..., -13.9216,  -7.5064,  10.1067],
         ...,
         [-18.4111,   4.6133,  -9.1161,  ...,  -6.4589,  -5.3470,   9.6319],
         [-17.5994,   2.1448,  -6.5532,  ...,  -7.7555,  -6.6126,   8.6213],
         [-17.1438,  -2.2505, -12.8480,  ...,  -0.9427,   3.2534,  10.3222]],

        [[-17.5962,   1.1991,  -5.8727,  ...,  -5.7723,  -4.1332,   9.1045],
         [-15.4566,  -2.0382, -13.2519,  ...,  -2.0044,   4.0696,   9.3334],
         [-18.9061,  -3.2754,  -4.3255,  ..., -13.4530,  -6.4880,   9.8530],
         ...,
         [-18.4242,   4.6459,  -9.0621,  ...,  -6.1454,  -5.2408,   9.6497],
         [-17.8317,   4.5852,  -6.7727,  ...,  -7.4546,  -6.0971,   8.8158],
         [-18.2232,  -1.4176, -11.8852,  ...,  -1.5701,   0.537



 45%|████▍     | 121/269 [00:11<00:13, 10.66it/s][A[A

 46%|████▌     | 123/269 [00:11<00:13, 10.60it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-16.9376,  -2.5417, -10.7495,  ...,  -3.3462,  -4.7549,   9.1471],
         [-17.0746,  -4.0800, -13.5570,  ...,  -3.3889,  -1.3669,  11.3240],
         [-16.2383,  -2.3608,  -9.9309,  ...,  -1.7524,  -4.5196,   8.0056],
         ...,
         [-17.5868,   1.8116,  -8.2558,  ...,  -8.2964,  -4.4436,   9.8728],
         [-16.9487,   2.0682,  -7.8408,  ...,  -9.6998,  -6.1465,   8.9435],
         [-17.7662,   2.5224,  -8.9246,  ...,  -9.0317,  -6.0936,   9.4241]],

        [[-16.9132,  -2.4356, -10.5121,  ...,  -4.8119,  -4.6488,   8.9219],
         [-17.7267,  -3.9721, -13.0455,  ...,  -2.8782,  -6.0551,  10.7843],
         [-16.0811,  -2.7003,  -9.7802,  ...,  -2.2266,  -4.8908,   8.0573],
         ...,
         [-17.7062,   1.9997,  -8.3664,  ...,  -8.8874,  -4.9082,  10.1112],
         [-16.8994,   2.7802,  -7.9734,  ...,  -9.5685,  -6.2763,   8.9412],
         [-17.7585,   2.2074,  -8.3626,  ...,  -8.7042,  -4.877



 46%|████▋     | 125/269 [00:11<00:14, 10.05it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-16.6623,  -2.0018,  -4.9609,  ...,  -8.9344,  -7.2485,   9.1188],
         [-18.2891,   0.8668,  -4.6799,  ...,  -2.9640,  -4.7895,  11.1770],
         [-14.4496,  -3.2996,  -2.9536,  ...,  -7.2219,  -4.2623,   7.8469],
         ...,
         [-17.3107,  -0.6732,  -5.2492,  ...,  -2.1870,  -5.6902,  11.2576],
         [-13.3604,  10.9872,  -4.5051,  ...,  -7.2701,  -6.1205,   6.3182],
         [-17.6461,  -0.4662,  -5.0910,  ...,  -1.8893,  -6.2853,  11.3344]],

        [[-16.3770,  -3.4879,  -5.3193,  ...,  -8.8160,  -4.0474,   9.2977],
         [-18.0301,  -1.5416,  -4.9162,  ...,  -4.5944,  -3.4136,  10.9784],
         [-16.6918,  -3.7172,  -4.8353,  ...,  -8.5210,  -3.7258,   9.3465],
         ...,
         [-18.0017,  -0.4117,  -7.6058,  ...,  -9.0921,  -6.3554,  10.7357],
         [-18.2779,   3.7211,  -6.8971,  ...,  -9.0612,  -4.9723,  10.1172],
         [-18.5373,  -0.6996,  -6.2376,  ...,  -2.8881,  -4.530



 47%|████▋     | 127/269 [00:11<00:13, 10.70it/s][A[A

 48%|████▊     | 129/269 [00:11<00:12, 11.03it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-18.3613,   1.9754,  -8.9060,  ...,  -7.2130,  -4.9399,  10.1527],
         [-17.7463,   4.8462,  -7.9513,  ...,  -9.4248,  -6.8507,   9.9844],
         [-17.9970,   4.5457,  -8.7481,  ...,  -8.9854,  -6.5166,  10.1397],
         ...,
         [-18.2261,  -2.9594,  -6.5870,  ...,  -5.7558,  -4.4211,  10.3714],
         [-16.0448,   9.4236,  -5.7359,  ..., -10.2474,  -6.5836,   8.2611],
         [-17.4404,   3.9870,  -7.8873,  ...,  -8.4276,  -5.8190,  10.2064]],

        [[-17.0884,  -3.1625,  -5.6434,  ...,  -5.7549,  -5.9913,  10.1525],
         [-18.2772,   4.9044,  -8.2747,  ..., -10.1833,  -7.2013,  10.4586],
         [-18.2997,  -0.6334,  -7.0781,  ...,  -7.7024,  -5.4756,  11.0117],
         ...,
         [-18.3197,  -2.1298,  -5.9720,  ...,  -5.0478,  -5.0432,  10.5760],
         [-16.4788,   8.1565,  -5.9231,  ...,  -9.9368,  -6.5190,   8.7572],
         [-17.4013,   4.3277,  -7.8022,  ...,  -8.7853,  -5.714



 49%|████▊     | 131/269 [00:12<00:12, 11.27it/s][A[A

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.4638,  -1.0881,  -3.0700,  ...,  -7.9647, -10.1231,   9.5234],
         [-16.2392,  -0.2931,  -6.7300,  ...,  -9.6692,  -2.6942,   9.1590],
         [-18.6434,  -1.0434,  -6.0524,  ..., -13.4796,  -8.6406,   9.7167],
         ...,
         [-17.9559,  -1.7678,  -5.0045,  ..., -12.6818,  -7.7291,   9.4566],
         [-12.8882,  -1.5969,  -4.0898,  ..., -13.1281,  -6.4122,   7.0405],
         [-16.6169,  -2.6600,  -4.5844,  ..., -11.7985,  -8.3207,   9.6943]],

        [[-17.6876,  -0.9991,  -3.3056,  ...,  -8.8557, -10.5438,   9.8114],
         [-16.7776,  -1.9824,  -6.9332,  ...,  -9.5567,  -4.1853,   9.5292],
         [-19.0434,  -1.5887,  -4.4868,  ..., -13.2195, -10.2094,  10.2293],
         ...,
         [-16.6852,   0.2609,  -4.9644,  ...,  -4.6892,  -6.0541,   9.0287],
         [-16.2625,  -2.5734,  -5.6306,  ..., -11.6514,  -7.3293,   9.1208],
         [-15.3747,  -1.6309,  -5.7092,  ..., -12.2600,  -5.777



 49%|████▉     | 133/269 [00:12<00:11, 11.52it/s][A[A

 50%|█████     | 135/269 [00:12<00:12, 10.84it/s][A[A
  0%|          | 0/31 [00:12<?, ?it/s]
  0%|          | 0/36 [00:12<?, ?it/s]

DetrObjectDetectionOutput(loss=None, loss_dict=None, logits=tensor([[[-17.3799,  -0.6899,  -7.3343,  ...,  -9.0824,  -4.6747,  10.6513],
         [-18.2360,  -1.4384,  -8.2548,  ...,  -9.7290,  -4.3900,  11.1777],
         [-18.0677,   0.0465,  -8.5739,  ...,  -9.5549,  -5.1848,  11.1471],
         ...,
         [-17.1885,   5.1951,  -7.8788,  ...,  -7.9364,  -5.0623,  10.1157],
         [-15.2729,  11.7451,  -5.2781,  ...,  -9.9473,  -6.2565,   7.5780],
         [-17.6367,   4.8413,  -8.1154,  ...,  -8.3129,  -5.8120,  10.3899]],

        [[-18.5876,  -0.8709,  -7.2572,  ...,  -9.1576,  -5.5416,  10.7572],
         [-18.4655,  -0.9940,  -6.9411,  ...,  -9.6685,  -5.9337,  11.2879],
         [-18.8856,  -1.1597,  -5.3379,  ...,  -9.9241,  -6.4520,  11.2962],
         ...,
         [-17.2174,   5.8886,  -7.6019,  ...,  -7.8865,  -5.6073,   9.9750],
         [-15.5574,  11.1966,  -5.6650,  ...,  -9.7012,  -6.7694,   7.7707],
         [-19.3145,   0.3147,  -7.0568,  ...,  -9.9724,  -7.375




KeyboardInterrupt: 