In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import warnings
warnings.simplefilter('ignore')

import gc

from os import path
import sys
sys.path.append(path.abspath('..'))

In [None]:
import numpy as np
import torch
import cv2
from PIL import Image

import pycuda.autoinit # ! Важно!

from src.transforms import torch_preprocessing, trt_preprocessing
import src.common as common
from src.interface import TRTModel

In [None]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

In [None]:
DEVICE = 'cuda:0'
TORCH_FILE = '../models/gernet_l.pth'

ONNX_FILE_STATIC = '../models/gernet_l_static.onnx'
TRT_FILE_STATIC = '../models/gernet_l_static.engine'
TRT_FILE_FP16_STATIC = '../models/gernet_l_fp16_static.engine'
TRT_FILE_INT8_STATIC = '../models/gernet_l_int8_static.engine'

ONNX_FILE_DYNAMIC = '../models/gernet_l_dynamic.onnx'
TRT_FILE_DYNAMIC = '../models/gernet_l_dynamic.engine'
TRT_FILE_FP16_DYNAMIC = '../models/gernet_l_fp16_dynamic.engine'
TRT_FILE_INT8_DYNAMIC = '../models/gernet_l_int8_dynamic.engine'

In [None]:
image = cv2.imread('../data/dog.jpg')[..., ::-1]
print(image.shape)
Image.fromarray(image)


In [None]:
# Загрузим торчовую модель
torch_model = torch.load(TORCH_FILE)
_ = torch_model.eval()
_ = torch_model.to(DEVICE)

## Статический размер батча

In [None]:
BATCH_SIZE = 1

### Torch инференс

In [None]:
torch_input_tensor = torch_preprocessing(image).to(DEVICE)
torch_input_tensor = torch.cat([torch_input_tensor] * BATCH_SIZE)
print(torch_input_tensor.shape)

In [None]:
with torch.no_grad():
    torch_output_tensor = torch_model(torch_input_tensor).cpu().detach().numpy()

In [None]:
%%timeit
with torch.no_grad():
    torch_output_tensor = torch_model(torch_input_tensor).cpu().detach().numpy()

### TensorRT инференс

In [None]:
# Готовим тензора
trt_input_tensor = trt_preprocessing(image)
trt_input_tensor = np.concatenate([trt_input_tensor] * BATCH_SIZE)
print(trt_input_tensor.shape)

In [None]:
# инференс на fp32
trt_model = TRTModel(TRT_FILE_STATIC)
trt_output_tensor = trt_model(trt_input_tensor)

In [None]:
%%timeit
trt_output_tensor = trt_model(trt_input_tensor)

In [None]:
# инференс на fp16
trt_model_fp16 = TRTModel(TRT_FILE_FP16_STATIC)
trt_output_tensor_fp16 = trt_model_fp16(trt_input_tensor)

In [None]:
%%timeit
trt_output_tensor_fp16 = trt_model_fp16(trt_input_tensor)

In [None]:
# инференс на int8
trt_model_int8 = TRTModel(TRT_FILE_INT8_STATIC)
trt_output_tensor_int8 = trt_model_int8(trt_input_tensor)

In [None]:
%%timeit
trt_output_tensor_int8 = trt_model_int8(trt_input_tensor)

### Сравнение

In [None]:
# Сравнение логитов
print(f'fp32: {list(np.abs(trt_output_tensor - torch_output_tensor).max(1))[0]}')
print(f'fp16: {list(np.abs(trt_output_tensor_fp16 - torch_output_tensor).max(1))[0]}')
print(f'int8: {list(np.abs(trt_output_tensor_int8 - torch_output_tensor).max(1))[0]}')

In [None]:
# Сравнение после активации
print(f'fp32: {list(np.abs(softmax(trt_output_tensor) - softmax(torch_output_tensor)).max(1))[0]}')
print(f'fp16: {list(np.abs(softmax(trt_output_tensor_fp16) - softmax(torch_output_tensor)).max(1))[0]}')
print(f'int8: {list(np.abs(softmax(trt_output_tensor_int8) - softmax(torch_output_tensor)).max(1))[0]}')

In [None]:
# Финальный предикт
print(f'fp32: {list(softmax(trt_output_tensor).argmax(1))[0]}')
print(f'fp16: {list(softmax(trt_output_tensor).argmax(1))[0]}')
print(f'int8: {list(softmax(trt_output_tensor).argmax(1))[0]}')

## Динамический размер батча

In [None]:
BATCH_SIZE = 5

### Торч инференс

In [None]:
torch_input_tensor = torch_preprocessing(image).to(DEVICE)
torch_input_tensor = torch.cat([torch_input_tensor] * BATCH_SIZE)
print(torch_input_tensor.shape)

In [None]:
with torch.no_grad():
    torch_output_tensor = torch_model(torch_input_tensor).cpu().detach().numpy()

In [None]:
%%timeit
with torch.no_grad():
    torch_output_tensor = torch_model(torch_input_tensor).cpu().detach().numpy()

### TensorRT инференс

In [None]:
# Готовим тензора
trt_input_tensor = trt_preprocessing(image)
trt_input_tensor = np.concatenate([trt_input_tensor] * BATCH_SIZE)
print(trt_input_tensor.shape)

In [None]:
# инференс на fp32
trt_model = TRTModel(TRT_FILE_DYNAMIC)
trt_output_tensor = trt_model(trt_input_tensor)

In [None]:
%%timeit
trt_output_tensor = trt_model(trt_input_tensor)

In [None]:
# инференс на fp16
trt_model_fp16 = TRTModel(TRT_FILE_FP16_DYNAMIC)
trt_output_tensor_fp16 = trt_model_fp16(trt_input_tensor)

In [None]:
%%timeit
trt_output_tensor_fp16 = trt_model_fp16(trt_input_tensor)

In [None]:
# инференс на int8
trt_model_int8 = TRTModel(TRT_FILE_INT8_DYNAMIC)
trt_output_tensor_int8 = trt_model_int8(trt_input_tensor)

In [None]:
%%timeit
trt_output_tensor_int8 = trt_model_int8(trt_input_tensor)

### Сравнение

In [None]:
# Сравнение логитов
print(f'fp32: {list(np.abs(trt_output_tensor - torch_output_tensor).max(1))}')
print(f'fp16: {list(np.abs(trt_output_tensor_fp16 - torch_output_tensor).max(1))}')
print(f'int8: {list(np.abs(trt_output_tensor_int8 - torch_output_tensor).max(1))}')

In [None]:
# Сравнение после активации
print(f'fp32: {list(np.abs(softmax(trt_output_tensor) - softmax(torch_output_tensor)).max(1))}')
print(f'fp16: {list(np.abs(softmax(trt_output_tensor_fp16) - softmax(torch_output_tensor)).max(1))}')
print(f'int8: {list(np.abs(softmax(trt_output_tensor_int8) - softmax(torch_output_tensor)).max(1))}')

In [None]:
# Финальный предикт
print(f'fp32: {list(softmax(trt_output_tensor).argmax(1))}')
print(f'fp16: {list(softmax(trt_output_tensor).argmax(1))}')
print(f'int8: {list(softmax(trt_output_tensor).argmax(1))}')

## Ура, работает!