In [1]:
import numpy as np
import tritonclient.grpc as grpcclient

In [2]:
SHORT_EXAMPLE_SENTENCE = "NVIDIA is located in California."
LONG_EXAMPLE_SENTENCE = (
    "NVIDIA was founded by Jensen Huang, Chris Malachowsky, and Curtis Priem in 1993."
)
TRITON_SERVER_URL = "172.25.4.42:8001"
MODEL_NAME = "flair-ner-english-fast"
MODEL_VERSION = "1"

In [3]:
triton_client = grpcclient.InferenceServerClient(url=TRITON_SERVER_URL, verbose=False)

model_metadata = triton_client.get_model_metadata(
    model_name=MODEL_NAME, model_version=MODEL_VERSION
)

model_config = triton_client.get_model_config(
    model_name=MODEL_NAME, model_version=MODEL_VERSION
).config

input_names = [i.name for i in model_config.input]
output_names = [i.name for i in model_config.output]

In [4]:
print(SHORT_EXAMPLE_SENTENCE)
sample_for_inference = np.frombuffer(
    bytes(SHORT_EXAMPLE_SENTENCE, "utf-8"), dtype="uint8"
)

NVIDIA is located in California.


In [5]:
inputs = [grpcclient.InferInput("INPUT__0", sample_for_inference.shape, "UINT8")]

inputs[0].set_data_from_numpy(sample_for_inference)

outputs = [
    grpcclient.InferRequestedOutput(output_name, class_count=0)
    for output_name in output_names
]

response = triton_client.infer(MODEL_NAME, inputs, outputs=outputs)

In [6]:
sentence = bytes(response.as_numpy("OUTPUT__0")).decode()

sentence.split("â†’")

['Sentence: "NVIDIA was founded by Jensen Huang , Chris Malachowsky , and Curtis Priem in 1993 ." ',
 ' ["NVIDIA"/ORG, "Jensen Huang"/PER, "Chris Malachowsky"/PER, "Curtis Priem"/PER]']