In [2]:
import os
import sys
import logging
import argparse

import numpy as np
import tensorrt as trt
import utils.calibrator as calibrator

In [3]:
ROOT = "/home/jhpark/KISTI_Dockerimage/3dunet"
MODEL_NAME     = "model"
MODEL_PATH     = os.path.join(ROOT, MODEL_NAME+'.onnx')
MODEL_PB_PATH  = os.path.join(ROOT, 'model/config.pbtxt')

# Model Input details
MODEL_INPUT_NAME  = ["PET", "CT"]
MODEL_INPUT_SHAPE = [(-1,1,128,128,160),(-1,1,128,128,160)]
MODEL_OUTPUT_NAME = "Segmentation"

In [4]:
TRT_DATATYPE        = trt.DataType.INT8
TRT_CALIB_DATASET   = '/projects2/pi/jhpark/small_processed/test_samples'
TRT_MAX_BATCH_SIZE  = 1
TRT_ENGINE_PATH     = os.path.join(ROOT, f'engine/{MODEL_NAME}.engine')

In [5]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

# We first load all custom plugins shipped with TensorRT,
# some of them will be needed during inference
trt.init_libnvinfer_plugins(TRT_LOGGER, '')

# TRT engine placeholder
trt_engine = None

# Display requested engine settings to stdout
print("TensorRT inference engine settings:")
print("  * Inference precision - {}".format(TRT_DATATYPE))
print("  * Max batch size - {}\n".format(TRT_MAX_BATCH_SIZE))


TensorRT inference engine settings:
  * Inference precision - DataType.INT8
  * Max batch size - 1



In [6]:
cfg = {
    "precision":"int8"
}

In [7]:
context = trt.IExecutionContext

In [8]:
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser =trt.OnnxParser(network, TRT_LOGGER)

config = builder.create_builder_config()
profile = builder.create_optimization_profile()
profile.set_shape_input("PET", min=[-1,1,128,128,160], opt=[-1,1,128,128,160], max=[1,1,128,128,160]) 
profile.set_shape_input("CT", min=[-1,1,128,128,160], opt=[-1,1,128,128,160], max=[1,1,128,128,160])
config.add_optimization_profile(profile)

with open(MODEL_PATH, "rb") as f:
    parser.parse(f.read())

inputs = [network.get_input(i) for i in range(network.num_inputs)]
outputs = [network.get_output(i) for i in range(network.num_outputs)]    

if cfg["precision"] == "fp16":
    
    if builder.platform_has_fast_fp16:
        print("FP16 is not supported natively")
    else:
        config.set_flag(trt.BuilderFlag.FP16)
elif cfg["precision"] == "int8":

    if not builder.platform_has_fast_int8:
        print("INT is not supported natively")

    else:
        config.set_flag(trt.BuilderFlag.INT8)
        config.int8_calibrator = calibrator.SSDEntropyCalibrator(data_dir=TRT_CALIB_DATASET, cache_file=TRT_ENGINE_PATH+'INT8CacheFile')

engine = builder.build_serialized_network(network, config)

#with open(TRT_ENGINE_PATH, "wb") as f:
#    print("Serializing engine to file: {:}".format(TRT_ENGINE_PATH))
#    f.write(engine)

[07/21/2022-17:11:49] [TRT] [W] onnx2trt_utils.cpp:369: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[07/21/2022-17:11:49] [TRT] [E] 4: [network.cpp::validate::2997] Error Code 4: Internal Error (PET: dynamic input is missing dimensions in profile 0.)
[07/21/2022-17:11:49] [TRT] [E] 2: [builder.cpp::buildSerializedNetwork::636] Error Code 2: Internal Error (Assertion engine != nullptr failed. )
