In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import time

import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.python.compiler.tensorrt import trt_convert as trt
from tensorflow.python.saved_model import tag_constants
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

from tensorflow.keras.preprocessing import image

## Inference with naive saved model

In [2]:
model = ResNet50(weights='imagenet')
# Save the entire model as a SavedModel.
model.save('resnet50_saved_model')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: resnet50_saved_model/assets


In [3]:
# Inference with naive model
model = tf.keras.models.load_model('resnet50_saved_model')
img_path = './data/img0.JPG'  # Siberian_husky
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
preds = model.predict(x)
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('{} - Predicted: {}'.format(img_path, decode_predictions(preds, top=3)[0]))

./data/img0.JPG - Predicted: [('n02110185', 'Siberian_husky', 0.5568136), ('n02109961', 'Eskimo_dog', 0.41662624), ('n02110063', 'malamute', 0.021314148)]


In [4]:
batch_size = 8
batched_input = np.zeros((batch_size, 224, 224, 3), dtype=np.float32)

for i in range(batch_size):
  img_path = './data/img%d.JPG' % (i % 4)
  img = image.load_img(img_path, target_size=(224, 224))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)
  batched_input[i, :] = x
batched_input = tf.constant(batched_input)
print('batched_input shape: ', batched_input.shape)

# Benchmarking throughput
N_warmup_run = 50
N_run = 1000
elapsed_time = []

for i in range(N_warmup_run):
  preds = model.predict(batched_input)

for i in range(N_run):
  start_time = time.time()
  preds = model.predict(batched_input)
  end_time = time.time()
  elapsed_time = np.append(elapsed_time, end_time - start_time)
  if i % 50 == 0:
    print('Step {}: {:4.1f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

print('Throughput: {:.0f} images/s'.format(N_run * batch_size / elapsed_time.sum()))

batched_input shape:  (8, 224, 224, 3)
Step 0: 29.0ms
Step 50: 29.2ms
Step 100: 29.5ms
Step 150: 29.2ms
Step 200: 29.2ms
Step 250: 29.5ms
Step 300: 30.2ms
Step 350: 30.5ms
Step 400: 30.4ms
Step 450: 30.1ms
Step 500: 30.1ms
Step 550: 30.1ms
Step 600: 30.1ms
Step 650: 29.6ms
Step 700: 29.8ms
Step 750: 29.7ms
Step 800: 29.7ms
Step 850: 29.3ms
Step 900: 29.6ms
Step 950: 37.3ms
Throughput: 265 images/s


## TRT-FP32 model

In [3]:
# TF-TRT FP32 Model
print('Converting to TF-TRT FP32...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode=trt.TrtPrecisionMode.FP32,
                                                               max_workspace_size_bytes=8000000000)
converter = trt.TrtGraphConverterV2(input_saved_model_dir='resnet50_saved_model',
                                    conversion_params=conversion_params)
converter.convert()
converter.save(output_saved_model_dir='resnet50_saved_model_TFTRT_FP32')
print('Done Converting to TF-TRT FP32')

Converting to TF-TRT FP32...
INFO:tensorflow:Linked TensorRT version: (6, 0, 1)
INFO:tensorflow:Loaded TensorRT version: (6, 0, 1)
INFO:tensorflow:Could not find TRTEngineOp_0_0 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.
INFO:tensorflow:Assets written to: resnet50_saved_model_TFTRT_FP32/assets
Done Converting to TF-TRT FP32


## TRT-FP16 model

In [2]:
print('Converting to TF-TRT FP16...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode=trt.TrtPrecisionMode.FP16,
    max_workspace_size_bytes=8000000000)
converter = trt.TrtGraphConverterV2(
   input_saved_model_dir='resnet50_saved_model', conversion_params=conversion_params)
converter.convert()
converter.save(output_saved_model_dir='resnet50_saved_model_TFTRT_FP16')
print('Done Converting to TF-TRT FP16')

Converting to TF-TRT FP16...
INFO:tensorflow:Linked TensorRT version: (6, 0, 1)
INFO:tensorflow:Loaded TensorRT version: (6, 0, 1)
INFO:tensorflow:Could not find TRTEngineOp_0 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: resnet50_saved_model_TFTRT_FP16/assets
Done Converting to TF-TRT FP16


## Benchmarking

In [5]:
batch_size = 8
batched_input = np.zeros((batch_size, 224, 224, 3), dtype=np.float32)

for i in range(batch_size):
  img_path = './data/img%d.JPG' % (i % 4)
  img = image.load_img(img_path, target_size=(224, 224))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)
  batched_input[i, :] = x
batched_input = tf.constant(batched_input)
print('batched_input shape: ', batched_input.shape)

def benchmark_tftrt(input_saved_model):
    saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']

    N_warmup_run = 50
    N_run = 1000
    elapsed_time = []

    for i in range(N_warmup_run):
      labeling = infer(batched_input)

    for i in range(N_run):
      start_time = time.time()
      labeling = infer(batched_input)
      #prob = labeling['probs'].numpy()
      end_time = time.time()
      elapsed_time = np.append(elapsed_time, end_time - start_time)
      if i % 50 == 0:
        print('Step {}: {:4.1f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

    print('Throughput: {:.0f} images/s'.format(N_run * batch_size / elapsed_time.sum()))

batched_input shape:  (8, 224, 224, 3)


In [6]:
benchmark_tftrt('resnet50_saved_model_TFTRT_FP32')

Step 0:  6.8ms
Step 50:  6.8ms
Step 100:  6.8ms
Step 150:  6.8ms
Step 200:  6.8ms
Step 250:  6.8ms
Step 300:  6.8ms
Step 350:  6.8ms
Step 400:  6.9ms
Step 450:  6.9ms
Step 500:  6.8ms
Step 550:  6.8ms
Step 600:  6.9ms
Step 650:  6.9ms
Step 700:  6.9ms
Step 750:  6.9ms
Step 800:  6.8ms
Step 850:  6.9ms
Step 900:  6.9ms
Step 950:  6.9ms
Throughput: 1168 images/s


In [7]:
benchmark_tftrt('resnet50_saved_model_TFTRT_FP16')

Step 0:  6.8ms
Step 50:  6.8ms
Step 100:  6.9ms
Step 150:  6.9ms
Step 200:  6.9ms
Step 250:  6.9ms
Step 300:  6.9ms
Step 350:  6.9ms
Step 400:  6.9ms
Step 450:  6.9ms
Step 500:  6.9ms
Step 550:  6.9ms
Step 600:  6.9ms
Step 650:  6.9ms
Step 700:  6.9ms
Step 750:  6.9ms
Step 800:  6.9ms
Step 850:  6.9ms
Step 900:  6.9ms
Step 950:  6.9ms
Throughput: 1163 images/s
