In [1]:
import tensorrt as trt
import json
import torch
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
logger = trt.Logger(trt.Logger.INFO)

# with open("/models/mobilenet_v2.trt", "rb") as f:
#     model_data = f.read()
# engine = runtime.deserialize_cuda_engine(model_data)

with open("/models/mobilenet_v2.trt", "rb") as f, trt.Runtime(logger) as runtime:
    try:
        meta_len = int.from_bytes(f.read(4), byteorder="little")  # read metadata length
        metadata = json.loads(f.read(meta_len).decode("utf-8"))  # read metadata
    except UnicodeDecodeError:
        f.seek(0)  # engine file may lack embedded metadata
    model = runtime.deserialize_cuda_engine(f.read())

print('model: ', model)
context = model.create_execution_context()

[12/05/2024-02:47:56] [TRT] [I] [MemUsageChange] Init CUDA: CPU +229, GPU +0, now: CPU 301, GPU 2321 (MiB)
[12/05/2024-02:47:56] [TRT] [I] Loaded engine size: 5 MiB
[12/05/2024-02:47:58] [TRT] [I] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +158, GPU +212, now: CPU 466, GPU 2553 (MiB)
[12/05/2024-02:48:01] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +240, GPU +271, now: CPU 706, GPU 2824 (MiB)
[model:  <tensorrt.tensorrt.ICudaEngine object at 0x7f12942c70>
12/05/2024-02:48:01] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +5, now: CPU 0, GPU 5 (MiB)
[12/05/2024-02:48:01] [TRT] [I] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +0, now: CPU 700, GPU 2824 (MiB)
[12/05/2024-02:48:01] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 700, GPU 2824 (MiB)
[12/05/2024-02:48:01] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +12, now: CPU 0, GPU 17 (MiB)


In [3]:
print('model num_bindings', model.num_bindings)

model num_bindings 2


In [4]:
input_name = model.get_binding_name(0)
print('input_name', input_name)
input_shape = model.get_binding_shape(0)
print('input_shape', input_shape)


input_name images
input_shape (1, 3, 224, 224)


In [5]:
output_name = model.get_binding_name(1)
print('output_name', output_name)
output_shape = model.get_binding_shape(1)
print('output_shape', output_shape)

output_name output0
output_shape (1, 2)


In [6]:
input_sample = torch.rand(1, 3, 224, 224).to(dtype=torch.float16).to('cuda:0')
out = torch.from_numpy(np.empty((1,2), dtype=np.float16)).to('cuda:0')

In [7]:
out

tensor([[0., 2.]], device='cuda:0', dtype=torch.float16)

In [8]:
context.execute_v2([int(input_sample.data_ptr()), int(out.data_ptr())])

True

In [9]:
torch.cuda.empty_cache()

In [9]:
out

tensor([[0., 0.]], device='cuda:0', dtype=torch.float16)