# Some setup (don't run)

In [None]:
url = "https://storage.googleapis.com/tensorflow-serving-apt"
src = "stable tensorflow-model-server tensorflow-model-server-universal"
!echo 'deb {url} {src}' > /etc/apt/sources.list.d/tensorflow-serving.list
!curl '{url}/tensorflow-serving.release.pub.gpg' | apt-key add -
!apt update -q && apt-get install -y tensorflow-model-server
%pip install -q -U tensorflow-serving-api

In [None]:
# model_base_path must have a folder structure with versions underneath. 
# E.g. `data/01`, `data/02`. Anything else won't work.
tensorflow_model_server \
     --port=8500 \
     --rest_api_port=8501 \
     --model_name=my-rl-model \
     --model_base_path=/home/amitaharoni/workspace/homl3/ch18/data/01-policy # >my_server.log 2>&1

# Code

## Setup

In [10]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorrt
import tensorflow as tf
from pathlib import Path
import numpy as np
import pandas as pd

# For JSON
import json
import requests

# For gRPC
import grpc
from tensorflow_serving.apis.predict_pb2 import PredictRequest
from tensorflow_serving.apis import prediction_service_pb2_grpc


In [6]:
x = tf.constant([0.0273956, -0.00611216, 0.03585979, 0.0197368])[tf.newaxis].numpy()

## JSON

In [7]:

# You can see the signatures by doing:
# saved_model_cli show --dir ch18/data/01-policy/01
# saved_model_cli show --dir ch18/data/01-policy/01 --tag_set serve
# saved_model_cli show --dir ch18/data/01-policy/01 --tag_set serve --signature_def serving_default
request_json = json.dumps({
    'signature_name': 'serving_default',
    'instances': x.tolist()
})

request_json

'{"signature_name": "serving_default", "instances": [[0.027395600453019142, -0.006112160161137581, 0.035859789699316025, 0.019736800342798233]]}'

In [3]:
server_url = 'http://localhost:8501/v1/models/my-rl-model:predict'
response = requests.post(server_url, data=request_json)
response.raise_for_status()
response = response.json()

In [5]:
y_proba = np.array(response['predictions'])
y_proba.round(2)

array([[0.46]])

## gRPC

In [9]:
request = PredictRequest()
request.model_spec.name = 'my-rl-model'
request.model_spec.signature_name = 'serving_default'
request.inputs['dense_input'].CopyFrom(tf.make_tensor_proto(x))

In [11]:
channel = grpc.insecure_channel('localhost:8500')
predict_service = prediction_service_pb2_grpc.PredictionServiceStub(channel)
response = predict_service.Predict(request, timeout=10.0)

In [12]:
response

model_spec {
  name: "my-rl-model"
  version {
    value: 1
  }
  signature_name: "serving_default"
}
outputs {
  key: "dense_1"
  value {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 1
      }
      dim {
        size: 1
      }
    }
    float_val: 0.464128375
  }
}

In [22]:
output_protos = response.outputs['dense_1']
y_proba = tf.make_ndarray(output_protos)
y_proba

array([[0.46412838]], dtype=float32)