In [77]:
import torch
from PIL import Image
import requests
import time
import torchvision
from image_processing_fast import BaseImageProcessorFast
from torchvision.transforms import v2

from transformers import AutoImageProcessor, AutoProcessor

In [88]:
# checkpoint = "SenseTime/deformable-detr"
checkpoint = "facebook/detr-resnet-50"
# checkpoint = "PekingU/rtdetr_r101vd"

device = "cuda"
load_dtype = torch.float32

In [79]:
path = "/home/ubuntu/models_implem/000000039769.jpg"
image_tensor = torchvision.io.read_image(path).to(device).unsqueeze(0)
# create a batch by repeating the image
image_tensor = image_tensor.repeat(2, 1, 1, 1)
print(image_tensor.shape)

torch.Size([2, 3, 480, 640])


In [80]:
config = AutoProcessor.from_pretrained(checkpoint)
config.to_dict()

{'_processor_class': None,
 'image_processor_type': 'DetrImageProcessor',
 'format': 'coco_detection',
 'do_resize': True,
 'size': {'shortest_edge': 800, 'longest_edge': 1333},
 'resample': <Resampling.BILINEAR: 2>,
 'do_rescale': True,
 'rescale_factor': 0.00392156862745098,
 'do_normalize': True,
 'do_convert_annotations': True,
 'image_mean': [0.485, 0.456, 0.406],
 'image_std': [0.229, 0.224, 0.225],
 'do_pad': True,
 'pad_size': None}

In [81]:

processor = AutoImageProcessor.from_pretrained(checkpoint, do_pad=False)
# optim_processor = processor(
#     image_mean=[0.485, 0.456, 0.406],
#     image_std=[0.229, 0.224, 0.225],
#     do_rescale=True,
#     do_resize=True,
#     do_normalize=True,
#     size=(640, 640),
#     dtype=torch.float32,
# )

optim_processor = BaseImageProcessorFast(**(processor.to_dict()))

In [92]:

start = time.time()
loading_time = 0
processing_time = 0
for i in range(1000):
    start_loadimage = time.time()
    image_tensor = v2.functional.to_dtype(torchvision.io.read_image(path).unsqueeze(0).to(device), load_dtype)
    image_tensor = torchvision.io.read_image(path).unsqueeze(0).to(device).to(load_dtype)
    loading_time += time.time() - start_loadimage
    start_process = time.time()
    images_processed_optim = optim_processor(image_tensor)
    processing_time += time.time() - start_process
end = time.time()
print("Fast time: ", end - start)
print("loading time: ", loading_time)
print("processing time: ", processing_time)

Fast time:  4.103736400604248
loading time:  3.7540950775146484
processing time:  0.3482944965362549


In [33]:
image_tensor

tensor([[[[140., 144., 146.,  ...,  94., 107., 102.],
          [138., 142., 139.,  ..., 103., 115.,  96.],
          [135., 150., 142.,  ..., 103., 108.,  93.],
          ...,
          [237., 225., 236.,  ..., 171., 181., 147.],
          [230., 226., 238.,  ..., 114., 103.,  89.],
          [238., 246., 238.,  ...,  74.,  74.,  73.]],

         [[ 25.,  25.,  24.,  ...,  16.,  13.,  10.],
          [ 22.,  26.,  20.,  ...,  11.,  17.,  13.],
          [ 22.,  33.,  23.,  ...,   8.,  19.,  10.],
          ...,
          [100.,  84.,  96.,  ...,  47.,  62.,  28.],
          [ 84.,  80.,  99.,  ...,  24.,   5.,   9.],
          [100., 109.,  96.,  ...,  13.,  25.,  17.]],

         [[ 56.,  67.,  73.,  ...,  38.,  39.,  33.],
          [ 57.,  49.,  48.,  ...,  36.,  42.,  31.],
          [ 42.,  59.,  53.,  ...,  32.,  39.,  26.],
          ...,
          [190., 196., 203.,  ..., 131., 144., 110.],
          [221., 213., 202.,  ...,  62.,  46.,  44.],
          [175., 191., 214.,  ...

In [31]:
image_tensor

tensor([[[[140., 144., 146.,  ...,  94., 107., 102.],
          [138., 142., 139.,  ..., 103., 115.,  96.],
          [135., 150., 142.,  ..., 103., 108.,  93.],
          ...,
          [237., 225., 236.,  ..., 171., 181., 147.],
          [230., 226., 238.,  ..., 114., 103.,  89.],
          [238., 246., 238.,  ...,  74.,  74.,  73.]],

         [[ 25.,  25.,  24.,  ...,  16.,  13.,  10.],
          [ 22.,  26.,  20.,  ...,  11.,  17.,  13.],
          [ 22.,  33.,  23.,  ...,   8.,  19.,  10.],
          ...,
          [100.,  84.,  96.,  ...,  47.,  62.,  28.],
          [ 84.,  80.,  99.,  ...,  24.,   5.,   9.],
          [100., 109.,  96.,  ...,  13.,  25.,  17.]],

         [[ 56.,  67.,  73.,  ...,  38.,  39.,  33.],
          [ 57.,  49.,  48.,  ...,  36.,  42.,  31.],
          [ 42.,  59.,  53.,  ...,  32.,  39.,  26.],
          ...,
          [190., 196., 203.,  ..., 131., 144., 110.],
          [221., 213., 202.,  ...,  62.,  46.,  44.],
          [175., 191., 214.,  ...

In [7]:
start = time.time()
loading_time = 0
processing_time = 0
for i in range(100):
    start_loadimage = time.time()
    image = Image.open(path)
    loading_time += time.time() - start_loadimage
    start_process = time.time()
    images_processed = processor(image, return_tensors="pt").to(device)
    processing_time += time.time() - start_process
end = time.time()
print("Slow time: ", end - start)
print("loading time: ", loading_time)
print("processing time: ", processing_time)

Slow time:  3.110658884048462
loading time:  0.01995372772216797
processing time:  3.0901899337768555


In [8]:
# print shapes
print(images_processed_optim["pixel_values"].shape)
print(images_processed["pixel_values"].shape)

torch.Size([1, 3, 800, 1066])
torch.Size([1, 3, 800, 1066])


In [9]:
# print max diff
diff = torch.abs(images_processed["pixel_values"] - images_processed_optim["pixel_values"])
# print relative difference and relative max difference
print(diff.max()/images_processed["pixel_values"].mean(), diff.median()/images_processed["pixel_values"].mean())

tensor(0.0520, device='cuda:0') tensor(0.0147, device='cuda:0')


In [10]:
assert torch.allclose(images_processed["pixel_values"], images_processed_optim["pixel_values"], atol=1e-4)

AssertionError: 