forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request PaddlePaddle#18 from cryoco/add-trt-int8-doc
add trt int8 tutorial and demos
- Loading branch information
Showing
8 changed files
with
626 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
project(cpp_inference_demo CXX C) | ||
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) | ||
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) | ||
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) | ||
option(USE_TENSORRT "Compile demo with TensorRT." OFF) | ||
|
||
|
||
macro(safe_set_static_flag) | ||
foreach(flag_var | ||
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE | ||
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) | ||
if(${flag_var} MATCHES "/MD") | ||
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") | ||
endif(${flag_var} MATCHES "/MD") | ||
endforeach(flag_var) | ||
endmacro() | ||
|
||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g") | ||
set(CMAKE_STATIC_LIBRARY_PREFIX "") | ||
message("flags" ${CMAKE_CXX_FLAGS}) | ||
|
||
if(NOT DEFINED PADDLE_LIB) | ||
message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") | ||
endif() | ||
if(NOT DEFINED DEMO_NAME) | ||
message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") | ||
endif() | ||
|
||
|
||
include_directories("${PADDLE_LIB}") | ||
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include") | ||
include_directories("${PADDLE_LIB}/third_party/install/glog/include") | ||
include_directories("${PADDLE_LIB}/third_party/install/gflags/include") | ||
include_directories("${PADDLE_LIB}/third_party/install/xxhash/include") | ||
include_directories("${PADDLE_LIB}/third_party/install/zlib/include") | ||
include_directories("${PADDLE_LIB}/third_party/boost") | ||
include_directories("${PADDLE_LIB}/third_party/eigen3") | ||
|
||
if (USE_TENSORRT AND WITH_GPU) | ||
include_directories("${TENSORRT_ROOT}/include") | ||
link_directories("${TENSORRT_ROOT}/lib") | ||
endif() | ||
|
||
link_directories("${PADDLE_LIB}/third_party/install/zlib/lib") | ||
|
||
link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib") | ||
link_directories("${PADDLE_LIB}/third_party/install/glog/lib") | ||
link_directories("${PADDLE_LIB}/third_party/install/gflags/lib") | ||
link_directories("${PADDLE_LIB}/third_party/install/xxhash/lib") | ||
link_directories("${PADDLE_LIB}/paddle/lib") | ||
|
||
add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) | ||
|
||
if(WITH_MKL) | ||
include_directories("${PADDLE_LIB}/third_party/install/mklml/include") | ||
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} | ||
${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn") | ||
if(EXISTS ${MKLDNN_PATH}) | ||
include_directories("${MKLDNN_PATH}/include") | ||
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) | ||
endif() | ||
else() | ||
set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
endif() | ||
|
||
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a | ||
if(WITH_STATIC_LIB) | ||
set(DEPS | ||
${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
else() | ||
set(DEPS | ||
${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
endif() | ||
|
||
set(EXTERNAL_LIB "-lrt -ldl -lpthread") | ||
set(DEPS ${DEPS} | ||
${MATH_LIB} ${MKLDNN_LIB} | ||
glog gflags protobuf z xxhash | ||
${EXTERNAL_LIB}) | ||
|
||
if(WITH_GPU) | ||
if (USE_TENSORRT) | ||
set(DEPS ${DEPS} | ||
${TENSORRT_ROOT}/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
set(DEPS ${DEPS} | ||
${TENSORRT_ROOT}/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
endif() | ||
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX} ) | ||
set(DEPS ${DEPS} ${CUDA_LIB}/libcublas${CMAKE_SHARED_LIBRARY_SUFFIX} ) | ||
set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX} ) | ||
endif() | ||
|
||
target_link_libraries(${DEMO_NAME} ${DEPS}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
## 使用Paddle-TRT进行ResNet50图像分类样例 | ||
|
||
该文档为使用Paddle-TRT预测在ResNet50分类模型上的实践demo。如果您刚接触Paddle-TRT,推荐先访问[这里](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html)对Paddle-TRT有个初步认识。 | ||
|
||
本目录下, | ||
|
||
- `trt_fp32_test.cc` 为使用Paddle-TRT进行FP32精度预测的样例程序源文件(程序中的输入为固定值,如果您有opencv或其他方式进行数据读取的需求,需要对程序进行一定的修改)。 | ||
- `trt_gen_calib_table_test.cc` 为离线量化校准中,产出量化校准表的样例程序源文件。 | ||
- `trt_int8_test.cc` 为使用Paddle-TRT进行Int8精度预测的样例程序源文件,根据传入布尔类型参数`use_calib`为`true`或`false`,可以进行加载离线量化校准表进行Int8预测,或加载PaddleSlim量化产出的Int8模型进行预测。 | ||
- `CMakeLists.txt` 为编译构建文件。 | ||
- `run_impl.sh` 包含了第三方库、预编译库的信息配置。 | ||
|
||
### 获取模型 | ||
首先,我们从下列链接下载所需模型: | ||
|
||
[ResNet50 FP32模型](https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50.tar.gz) | ||
|
||
[ResNet50 PaddleSlim量化模型](https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz) | ||
|
||
其中,FP32模型用于FP32精度预测,以及Int8离线校准预测;量化模型由模型压缩工具库PaddleSlim产出,PaddleSlim模型量化相关信息可以参考[这里](https://paddlepaddle.github.io/PaddleSlim/quick_start/quant_aware_tutorial.html)。使用Paddle-TRT进行Int8量化预测的介绍可以参考[这里](https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/docs/optimize/paddle_trt.rst#int8%E9%87%8F%E5%8C%96%E9%A2%84%E6%B5%8B)。 | ||
|
||
### 一、使用TRT FP32精度预测 | ||
|
||
1)**修改`run_impl.sh`** | ||
|
||
打开`run_impl.sh`,我们对以下的几处信息进行修改: | ||
|
||
```shell | ||
# 选择使用fp32预测的demo | ||
DEMO_NAME=trt_fp32_test | ||
|
||
# 本节中,我们使用了TensorRT,需要将USE_TENSORRT打开 | ||
WITH_MKL=ON | ||
WITH_GPU=ON | ||
USE_TENSORRT=ON | ||
|
||
# 配置预测库的根目录 | ||
LIB_DIR=/paddle/fluid_inference_install_dir | ||
|
||
# 如果上述的WITH_GPU 或 USE_TENSORRT设为ON,请设置对应的CUDA, CUDNN, TENSORRT的路径。请注意CUDA和CUDNN需要设置到lib64一层,而TensorRT是设置到根目录一层 | ||
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64 | ||
CUDA_LIB=/paddle/nvidia-downloads/cuda-10.1/lib64 | ||
TENSORRT_ROOT=/paddle/nvidia-downloads/TensorRT-6.0.1.5 | ||
``` | ||
|
||
运行 `sh run_impl.sh`, 会在目录下产生build目录。 | ||
|
||
|
||
2) **运行样例** | ||
|
||
```shell | ||
# 进入build目录 | ||
cd build | ||
# 运行样例 | ||
./trt_fp32_test --model_file=../ResNet50/model --params_file=../ResNet50/params | ||
``` | ||
|
||
运行结束后,程序会将模型预测输出的前20个结果打印到屏幕,说明运行成功。 | ||
|
||
### 二、使用TRT Int8离线量化预测 | ||
|
||
使用TRT Int8离线量化预测分为两步:生成量化校准表,以及加载校准表执行Int8预测。需要注意的是TRT Int8离线量化预测使用的仍然是ResNet50 FP32 模型,是通过校准表中包含的量化scale在运行时将FP32转为Int8从而加速预测的。 | ||
|
||
#### 生成量化校准表 | ||
|
||
1)**修改`run_impl.sh`** | ||
|
||
打开`run_impl.sh`,我们对以下的几处信息进行修改: | ||
|
||
```shell | ||
# 选择生成量化校准表的demo | ||
DEMO_NAME=trt_gen_calib_table_test | ||
|
||
# 本节中,我们使用了TensorRT,需要将USE_TENSORRT打开 | ||
WITH_MKL=ON | ||
WITH_GPU=ON | ||
USE_TENSORRT=ON | ||
|
||
# 配置预测库的根目录 | ||
LIB_DIR=/paddle/fluid_inference_install_dir | ||
|
||
# 如果上述的WITH_GPU 或 USE_TENSORRT设为ON,请设置对应的CUDA, CUDNN, TENSORRT的路径。请注意CUDA和CUDNN需要设置到lib64一层,而TensorRT是设置到根目录一层 | ||
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64 | ||
CUDA_LIB=/paddle/nvidia-downloads/cuda-10.1/lib64 | ||
TENSORRT_ROOT=/paddle/nvidia-downloads/TensorRT-6.0.1.5 | ||
``` | ||
|
||
运行 `sh run_impl.sh`, 会在目录下产生build目录。 | ||
|
||
2) **运行样例** | ||
|
||
```shell | ||
# 进入build目录 | ||
cd build | ||
# 运行样例 | ||
./trt_gen_calib_table_test --model_file=../ResNet50/model --params_file=../ResNet50/params | ||
``` | ||
|
||
运行结束后,模型文件夹`ResNet50`下的`_opt_cache`文件夹下会多出一个名字为`trt_calib_*`的文件,即校准表。 | ||
|
||
#### 加载校准表执行Int8预测 | ||
|
||
再次修改`run_impl.sh`,换成执行Int8预测的demo: | ||
|
||
```shell | ||
# 选择执行Int8预测的demo | ||
DEMO_NAME=trt_int8_test | ||
|
||
# 本节中,我们使用了TensorRT,需要将USE_TENSORRT打开 | ||
WITH_MKL=ON | ||
WITH_GPU=ON | ||
USE_TENSORRT=ON | ||
|
||
# 配置预测库的根目录 | ||
LIB_DIR=/paddle/fluid_inference_install_dir | ||
|
||
# 如果上述的WITH_GPU 或 USE_TENSORRT设为ON,请设置对应的CUDA, CUDNN, TENSORRT的路径。请注意CUDA和CUDNN需要设置到lib64一层,而TensorRT是设置到根目录一层 | ||
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64 | ||
CUDA_LIB=/paddle/nvidia-downloads/cuda-10.1/lib64 | ||
TENSORRT_ROOT=/paddle/nvidia-downloads/TensorRT-6.0.1.5 | ||
``` | ||
|
||
运行 `sh run_impl.sh`, 会在目录下产生build目录。 | ||
|
||
2) **运行样例** | ||
|
||
```shell | ||
# 进入build目录 | ||
cd build | ||
# 运行样例,注意此处要将use_calib配置为true | ||
./trt_int8_test --model_file=../ResNet50/model --params_file=../ResNet50/params --use_calib=true | ||
``` | ||
|
||
运行结束后,程序会将模型预测输出的前20个结果打印到屏幕,说明运行成功。 | ||
|
||
**Note** | ||
|
||
观察`trt_gen_calib_table_test`和`trt_int8_test`的代码可以发现,生成校准表和加载校准表进行Int8预测的TensorRT配置是相同的,都是 | ||
|
||
```c++ | ||
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 5, AnalysisConfig::Precision::kInt8, false, true /*use_calib*/); | ||
``` | ||
|
||
Paddle-TRT判断是生成还是加载校准表的条件是模型目录下`_opt_cache`文件夹里是否有一个名字为`trt_calib_*`的与当前模型对应的校准表文件。在运行时为了防止混淆生成与加载过程,可以通过观察运行log来区分。 | ||
|
||
生成校准表的log: | ||
|
||
``` | ||
I0623 08:40:49.386909 107053 tensorrt_engine_op.h:159] This process is generating calibration table for Paddle TRT int8... | ||
I0623 08:40:49.387279 107057 tensorrt_engine_op.h:352] Prepare TRT engine (Optimize model structure, Select OP kernel etc). This process may cost a lot of time. | ||
I0623 08:41:13.784473 107053 analysis_predictor.cc:791] Wait for calib threads done. | ||
I0623 08:41:14.419198 107053 analysis_predictor.cc:793] Generating TRT Calibration table data, this may cost a lot of time... | ||
``` | ||
|
||
加载校准表预测的log: | ||
|
||
``` | ||
I0623 08:40:27.217701 107040 tensorrt_subgraph_pass.cc:258] RUN Paddle TRT int8 calibration mode... | ||
I0623 08:40:27.217834 107040 tensorrt_subgraph_pass.cc:321] Prepare TRT engine (Optimize model structure, Select OP kernel etc). This process may cost a lot of time. | ||
``` | ||
|
||
### 三、使用TRT 加载PaddleSlim Int8量化模型预测 | ||
|
||
这里,我们使用前面下载的ResNet50 PaddleSlim量化模型。与加载离线量化校准表执行Int8预测的区别是,PaddleSlim量化模型已经将scale保存在模型op的属性中,这里我们就不再需要校准表了,所以在运行样例时将`use_calib`配置为false。 | ||
|
||
1)**修改`run_impl.sh`** | ||
|
||
打开`run_impl.sh`,我们对以下的几处信息进行修改: | ||
|
||
```shell | ||
# 选择使用Int8预测的demo | ||
DEMO_NAME=trt_int8_test | ||
|
||
# 本节中,我们使用了TensorRT,需要将USE_TENSORRT打开 | ||
WITH_MKL=ON | ||
WITH_GPU=ON | ||
USE_TENSORRT=ON | ||
|
||
# 配置预测库的根目录 | ||
LIB_DIR=/paddle/fluid_inference_install_dir | ||
|
||
# 如果上述的WITH_GPU 或 USE_TENSORRT设为ON,请设置对应的CUDA, CUDNN, TENSORRT的路径。请注意CUDA和CUDNN需要设置到lib64一层,而TensorRT是设置到根目录一层 | ||
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64 | ||
CUDA_LIB=/paddle/nvidia-downloads/cuda-10.1/lib64 | ||
TENSORRT_ROOT=/paddle/nvidia-downloads/TensorRT-6.0.1.5 | ||
``` | ||
|
||
运行 `sh run_impl.sh`, 会在目录下产生build目录。 | ||
|
||
|
||
2) **运行样例** | ||
|
||
```shell | ||
# 进入build目录 | ||
cd build | ||
# 运行样例,注意此处要将use_calib配置为false | ||
./trt_int8_test --model_file=../ResNet50_quant/model --params_file=../ResNet50_quant/params --use_calib=false | ||
``` | ||
|
||
运行结束后,程序会将模型预测输出的前20个结果打印到屏幕,说明运行成功。 | ||
|
||
### 更多链接 | ||
- [Paddle Inference使用Quick Start!](https://paddle-inference.readthedocs.io/en/latest/introduction/quick_start.html) | ||
- [Paddle Inference C++ Api使用](https://paddle-inference.readthedocs.io/en/latest/user_guides/cxx_api.html) | ||
- [Paddle Inference Python Api使用](https://paddle-inference.readthedocs.io/en/latest/user_guides/inference_python_api.html) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
mkdir -p build | ||
cd build | ||
rm -rf * | ||
|
||
# same with the resnet50_test.cc | ||
DEMO_NAME=resnet50_trt_test | ||
|
||
WITH_MKL=ON | ||
WITH_GPU=ON | ||
USE_TENSORRT=ON | ||
|
||
LIB_DIR=/paddle/build/fluid_inference_install_dir | ||
CUDNN_LIB=/paddle/nvidia-downloads/cudnn_v7.6_cuda10.1/lib64 | ||
CUDA_LIB=/paddle/nvidia-downloads/cuda-10.1/lib64 | ||
TENSORRT_ROOT=/paddle/nvidia-downloads/TensorRT-6.0.1.5 | ||
|
||
cmake .. -DPADDLE_LIB=${LIB_DIR} \ | ||
-DWITH_MKL=${WITH_MKL} \ | ||
-DDEMO_NAME=${DEMO_NAME} \ | ||
-DWITH_GPU=${WITH_GPU} \ | ||
-DWITH_STATIC_LIB=OFF \ | ||
-DUSE_TENSORRT=${USE_TENSORRT} \ | ||
-DCUDNN_LIB=${CUDNN_LIB} \ | ||
-DCUDA_LIB=${CUDA_LIB} \ | ||
-DTENSORRT_ROOT=${TENSORRT_ROOT} | ||
|
||
make -j |
Oops, something went wrong.