In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import warnings
warnings.simplefilter('ignore')

import gc

from os import path
import sys
sys.path.append(path.abspath('..'))

In [None]:
import numpy as np
import torch
import cv2
from PIL import Image

import pycuda.autoinit

from src.transforms import torch_preprocessing, trt_preprocessing
import src.common as common
from src.interface import TRTModel

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [None]:
DEVICE = 'cuda:0'
BATCH_SIZE = 1

TORCH_FILE = '../models/resnet34.pth'
TRT_FILE ='../models/resnet34.engine'
TRT_FILE_FP16 ='../models/resnet34_fp16.engine'

In [None]:
image = cv2.imread('../data/dog.jpg')[..., ::-1]
print(image.shape)
Image.fromarray(image)

## Torch инференс

In [None]:
torch_model = torch.load(TORCH_FILE)
_ = torch_model.eval()
_ = torch_model.to(DEVICE)

In [None]:
torch_input_tensor = torch_preprocessing(image).to(DEVICE)
torch_input_tensor = torch.cat([torch_input_tensor] * BATCH_SIZE)
print(torch_input_tensor.shape)

In [None]:
with torch.no_grad():
    torch_output_tensor = torch_model(torch_input_tensor).cpu().detach().numpy()

In [None]:
%%timeit
with torch.no_grad():
    torch_output_tensor = torch_model(torch_input_tensor).cpu().detach().numpy()

## TensorRT инференс

In [None]:
# готовим данные
trt_input_tensor = trt_preprocessing(image)
trt_input_tensor = np.concatenate([trt_input_tensor] * BATCH_SIZE)
print(trt_input_tensor.shape)

In [None]:
# загружаем модель fp32
trt_model = TRTModel(TRT_FILE)
trt_output_tensor = trt_model(trt_input_tensor)

In [None]:
%%timeit
trt_output_tensor = trt_model(trt_input_tensor)

In [None]:
# загружаем модель fp16
trt_model_fp16 = TRTModel(TRT_FILE_FP16)
trt_output_tensor_fp16 = trt_model_fp16(trt_input_tensor)

In [None]:
%%timeit
trt_output_tensor = trt_model_fp16(trt_input_tensor)

## Сравнение

In [None]:
np.abs(trt_output_tensor - torch_output_tensor).max((2, 3))[0, 0]

In [None]:
# Сравнение логитов
print(f'fp32: {np.abs(trt_output_tensor - torch_output_tensor).max((2, 3))[0, 0]}')
print(f'fp16: {np.abs(trt_output_tensor_fp16 - torch_output_tensor).max((2, 3))[0, 0]}')

In [None]:
# Сравнение логитов
print(f'fp32: {np.abs(sigmoid(trt_output_tensor) - sigmoid(torch_output_tensor)).max((2, 3))[0, 0]}')
print(f'fp16: {np.abs(sigmoid(trt_output_tensor_fp16) - sigmoid(torch_output_tensor)).max((2, 3))[0, 0]}')