In [None]:
import torch
import torchvision
import onnx

# Загрузка предобученной модели
model = torchvision.models.resnet18(pretrained=True)
model.eval()

# Входные данные для модели с batch=1
dummy_input = torch.randn(1, 3, 224, 224)

# Экспорт модели в формат ONNX с фиксированным batch=1 (без dynamic_axes)
torch.onnx.export(
    model,
    dummy_input,
    "resnet18_explicit.onnx",
    input_names=['input'],
    output_names=['output'],
    # Не указываем dynamic_axes!
    opset_version=17
)

# Загрузка и проверка ONNX-модели
onnx_model = onnx.load("resnet18_explicit.onnx")
onnx.checker.check_model(onnx_model)

# Проверим имена и размеры:
print("Inputs:")
for inp in onnx_model.graph.input:
    print(inp.name, [dim.dim_param or dim.dim_value for dim in inp.type.tensor_type.shape.dim])

print("\nOutputs:")
for out in onnx_model.graph.output:
    print(out.name, [dim.dim_param or dim.dim_value for dim in out.type.tensor_type.shape.dim])

In [3]:
import tensorrt as trt

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

# Инициализация TensorRT билдера и парсера ONNX
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(1) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
    # Загрузка ONNX-модели и парсинг в TensorRT
    with open("./resnet18_explicit.onnx", "rb") as model_file:
        parser.parse(model_file.read())

    builder_config = builder.create_builder_config()
    builder_config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 32)
    #builder_config.set_tactic_sources(1 << int(trt.TacticSource.CUBLAS))
    builder_config.set_flag(trt.BuilderFlag.FP16)
       
    serialized_network = builder.build_serialized_network(network, builder_config)
    with open("resnet18_explicit.trt", "wb") as f:
        f.write(serialized_network)

In [4]:
import tensorrt as trt

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)


In [5]:
def load_engine(engine_path):
        with open(engine_path, "rb") as engine_file, trt.Runtime(TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(engine_file.read())
        return engine

In [6]:
bacbone = load_engine("resnet18_explicit.trt")


In [7]:
bacbone

<tensorrt.tensorrt.ICudaEngine at 0x2a673979e70>

In [8]:
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

In [9]:
def initialize_trt_bb(engine):
        context = engine.create_execution_context()
        

        # Предположим, что размеры входных данных известны заранее
        input_size_x = np.prod([1,3,224,224]) * np.dtype(np.float32).itemsize
        d_input_x = cuda.mem_alloc(int(input_size_x))

  

        output_size_cls = np.prod([1,1000]) * np.dtype(np.float32).itemsize
        d_output_cls = cuda.mem_alloc(int(output_size_cls))


        return context, d_input_x,  d_output_cls

In [10]:
context_bacbone, d_input_x, d_output_bb  = initialize_trt_bb(bacbone)

In [11]:
def predictV2_bb(context, input_data_x, d_input_x, d_output_cls):
        # Prepare input
        input_shape_x = input_data_x.shape
        input_size_x = np.prod(input_shape_x) * input_data_x.itemsize
        cuda.memcpy_htod(d_input_x, input_data_x.ravel())


        # Execute model
        context.execute_v2(bindings=[
            int(d_input_x),
            int(d_output_cls)
        ])

        # Fetch output data
        output_data_cls = np.empty([1,1000], dtype=np.float32)
        cuda.memcpy_dtoh(output_data_cls, d_output_cls)


        return output_data_cls

In [12]:
input_data1 = np.random.rand(1, 3, 224, 224)

In [20]:
import tensorrt as trt
import pycuda.autoinit
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

def load_engine(engine_path):
        with open(engine_path, "rb") as engine_file, trt.Runtime(TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(engine_file.read())
        return engine
bacbone = load_engine("resnet18_explicit.trt")
import numpy as np
import pycuda.driver as cuda

def initialize_trt_bb(engine):
    context = engine.create_execution_context()

    # Предположим, что размеры входных данных известны заранее
    input_size_x = np.prod([1, 3, 224, 224]) * np.dtype(np.float32).itemsize
    d_input_x = cuda.mem_alloc(int(input_size_x))

    output_size_cls = np.prod([1, 1000]) * np.dtype(np.float32).itemsize
    d_output_cls = cuda.mem_alloc(int(output_size_cls))

    return context, d_input_x, d_output_cls

# Предположим, что backbone уже определён и engine создан
context_backbone, d_input_x, d_output_cls  = initialize_trt_bb(bacbone)

def predictV2_bb(context, input_data_x, d_input_x, d_output_cls):
    # Prepare input
    input_shape_x = input_data_x.shape
    input_size_x = np.prod(input_shape_x) * input_data_x.itemsize
    cuda.memcpy_htod(d_input_x, input_data_x.ravel())

    # Execute model
    context.execute_v2(bindings=[
        int(d_input_x),
        int(d_output_cls)
    ])

    # Fetch output data
    output_data_cls = np.empty([1, 1000], dtype=np.float32)
    cuda.memcpy_dtoh(output_data_cls, d_output_cls)

    return output_data_cls

# Пример использования:
input_data1 = np.random.rand(1, 3, 224, 224)
output = predictV2_bb(context_backbone, input_data1.astype(np.float32), d_input_x, d_output_cls)

In [19]:
output

array([[-1.47167969e+00,  9.35546875e-01,  1.23828125e+00,
         1.25195312e+00,  1.09863281e+00, -3.87695312e-01,
         8.90136719e-01,  2.30102539e-01, -1.23535156e+00,
        -5.39550781e-01,  8.82812500e-01,  2.30859375e+00,
         1.06640625e+00,  2.36523438e+00,  2.43359375e+00,
         8.34472656e-01,  1.35546875e+00,  1.21765137e-01,
         1.21386719e+00,  1.68652344e+00,  5.99609375e-01,
         1.81347656e+00,  1.49707031e+00,  1.48632812e+00,
         2.54638672e-01,  2.91992188e-01,  4.65332031e-01,
         6.77734375e-01,  3.10974121e-02, -1.01269531e+00,
        -6.25488281e-01,  1.25195312e+00, -4.53125000e-01,
         8.95507812e-01,  2.41601562e+00, -5.42968750e-01,
        -3.57910156e-01, -4.17480469e-02,  1.42089844e+00,
         1.82739258e-01,  2.30664062e+00,  9.45800781e-01,
         1.76757812e+00,  6.50390625e-01,  1.56835938e+00,
         5.69824219e-01,  2.07617188e+00, -3.13232422e-01,
        -4.28466797e-02,  1.66381836e-01,  1.88964844e+0