##### Copyright 2018 The TensorFlow Authors.

In [1]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Build TVM module, deploy on Tensorflow Serving and Infer

This guide 
- Downloads Mobillenet model and builds using TVM frontend.
- Deploy it on Tensorflow Serving
- Perform inference through REST interface.


In [4]:
import os
import numpy as np

import tvm
from tvm import relay
import os.path
import tarfile,sys

# Tensorflow imports
import tensorflow as tf
from tensorflow.core.framework import graph_pb2
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import tensor_util

## Download Mobilenet Model

In [5]:
def untar(fname):
    file_tar, file_tar_ext = os.path.splitext(fname)
    print(file_tar)
    if (fname.endswith("tgz")):
        tar = tarfile.open(fname)
        tar.extractall(path="./" + file_tar)
        tar.close()
        print("Extracted in Current Directory")
    else:
        print("Not a tar.gz file")


def get_workload(path):
    from mxnet.gluon.utils import download
    download(path, ".")

    tar_name = os.path.basename(path)
    untar(tar_name)

    file_tar, file_tar_ext = os.path.splitext(tar_name)
    model_name = file_tar + "/" + file_tar + "_frozen.pb"
    return model_name




In [6]:
name = 'mobilenet_v1_1.0_224'
dload_path ='http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/' + name +'.tgz'
model_name = get_workload(dload_path)

mobilenet_v1_1.0_224
Extracted in Current Directory


## Compile the model on TVM and export the same

In [7]:
import tvm.relay.testing.tf as tf_testing

def import_into_tvm(graph_def, input_data, input_node, num_output=1):
    """ Generic function to compile on relay and execute on tvm """

    shape_dict = {input_node: input_data.shape}
    dtype_dict = {input_node: input_data.dtype}

    sym, params = relay.frontend.from_tensorflow(graph_def, layout=layout, shape=shape_dict)
    return sym, params


tf.reset_default_graph()
with tf.gfile.FastGFile(os.path.join("./", model_name), 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    graph = tf.import_graph_def(graph_def, name='')
    # Call the utility to import the graph definition into default graph.
    graph_def = tf_testing.ProcessGraphDefParam(graph_def)

    in_shape = (1, 224, 224 , 3)    
    shape_dict = {'input': in_shape}
    dtype_dict = {'input': "float32"}

    sym, params = relay.frontend.from_tensorflow(graph_def, shape=shape_dict)
        
    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build_module.build(
            sym, target="llvm", params=params)

        lib.export_library("model.so")
        with open("model.json", "w") as fo:
            fo.write(graph)
        with open("model.params", "wb") as fo:
            import nnvm
            fo.write(nnvm.compiler.save_param_dict(params))


Instructions for updating:
Use tf.gfile.GFile.




## Deoply TVM Model on TF Serving

You may use below template to deploy

model_config_list: {
  config: {
    name: "tvm_test", # Some name of the model
    base_path: "path to model", # In side this folder folder "1" contain model.json, model.so, model.params 
    model_platform: "tvm"
  },
}

Finally run the serving using below command

tensorflow_model_server --port=6003  --rest_api_port=6004 --model_config_file=/data/srk/Serving/models/model_config.txt

The output should look like below


2019-04-06 15:04:16.584957: I tensorflow_serving/model_servers/server_core.cc:462] Adding/updating models.
2019-04-06 15:04:16.585035: I tensorflow_serving/model_servers/server_core.cc:559]  (Re-)adding model: tvm_test
2019-04-06 15:04:16.685308: I tensorflow_serving/core/basic_manager.cc:739] Successfully reserved resources to load servable {name: tvm_test version: 1}
2019-04-06 15:04:16.685365: I tensorflow_serving/core/loader_harness.cc:66] Approving load for servable version {name: tvm_test version: 1}
2019-04-06 15:04:16.685380: I tensorflow_serving/core/loader_harness.cc:74] Loading servable version {name: tvm_test version: 1}

[15:04:17] tensorflow_serving/servables/tvm/tvm_loader.cc:73: Input:input
[15:04:17] tensorflow_serving/servables/tvm/tvm_loader.cc:80: Output:InceptionV3/Predictions/Reshape_1
2019-04-06 15:04:17.052067: I tensorflow_serving/core/loader_harness.cc:86] Successfully loaded servable version {name: tvm_test version: 1}
2019-04-06 15:04:17.053701: I tensorflow_serving/model_servers/server.cc:331] Running gRPC ModelServer at 0.0.0.0:6003 ...
[evhttp_server.cc : 237] RAW: Entering the event loop ...
2019-04-06 15:04:17.054633: I tensorflow_serving/model_servers/server.cc:351] Exporting HTTP/REST API at:localhost:6004 ...




In [8]:
# We can see the model metata as below

!curl http://localhost:6004/v1/models/tvm_test/metadata

{
"model_spec":{
 "name": "tvm",
 "signature_name": "",
 "version": "1"
}
,
"metadata": {"signature_def": {
 "signature_def": {
  "serving_default": {
   "inputs": {
    "input": {
     "dtype": "DT_FLOAT",
     "tensor_shape": {
      "dim": [
       {
        "size": "1",
        "name": ""
       },
       {
        "size": "299",
        "name": ""
       },
       {
        "size": "299",
        "name": ""
       },
       {
        "size": "3",
        "name": ""
       }
      ],
      "unknown_rank": false
     },
     "name": "input"
    }
   },
   "outputs": {
    "InceptionV3/Predictions/Reshape_1": {
     "dtype": "DT_FLOAT",
     "tensor_shape": {
      "dim": [
       {
        "size": "1",
        "name": ""
       },
       {
        "size": "1001",
        "name": ""
       }
      ],
      "unknown_rank": false
     },
     "name": "InceptionV3/Predictions/Reshape_1"
    }
   },
   "method_name": ""
  }
 }
}

In [15]:
# Infer using RESt interface

data = np.random.uniform(size=(1, 224, 224, 3)).astype('float32')

import json
data = json.dumps({"signature_name": "serving_default", "instances": data.tolist()})
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))

import requests
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:6004/v1/models/tvm_test:predict', data=data, headers=headers)
print("Json Resp:", json_response)
predictions = json.loads(json_response.text)['predictions']
print("Predictions:", len(predictions[0]))


Data: {"signature_name": "serving_default", "instances": ... 33990479, 0.742600679397583, 0.7655767202377319]]]]}
Json Resp: <Response [200]>
Predictions: 1001
