In [1]:
import numpy as np
import tritonclient.grpc as grpcclient
import ast

In [2]:
TRITON_SERVER_URL = "172.22.4.42:8001"
MODEL_NAME = "ner-english-fast"
MODEL_VERSION = "1"
MULTIPLIER = 256
SAMPLE_TEXTS = open("strings_list.txt", "r").read()
STRING_LIST = SAMPLE_TEXTS.split("\n") * MULTIPLIER
STRING_LIST = [sentence for sentence in STRING_LIST if len(sentence) > 0]

In [3]:
class ClientDecoder:
    def __init__(self, triton_server_url, model_name, model_version):
        self.triton_client = grpcclient.InferenceServerClient(
            url=triton_server_url, verbose=False
        )

        self.model_metadata = self.triton_client.get_model_metadata(
            model_name=model_name, model_version=model_version
        )

        self.model_config = self.triton_client.get_model_config(
            model_name=model_name, model_version=model_version
        ).config
        self.model_name = model_name

    def submit(self, sentence_list):
        if len(sentence_list) > self.model_config.max_batch_size:
            sentence_list_batch = [
                sentence_list[i : i + self.model_config.max_batch_size]
                for i in range(0, len(sentence_list), self.model_config.max_batch_size)
            ]
            response_batches = []
            for sentence_list_mb in sentence_list_batch:
                sentence_obj = np.array(sentence_list_mb).reshape(-1, 1).astype(object)
                triton_inputs = [
                    grpcclient.InferInput("INPUT_0", sentence_obj.shape, "BYTES")
                ]
                triton_inputs[0].set_data_from_numpy(sentence_obj)

                triton_outputs = [grpcclient.InferRequestedOutput("OUTPUT_0")]

                response = self.triton_client.infer(
                    self.model_name, triton_inputs, outputs=triton_outputs
                )
                response_batches.append(
                    ast.literal_eval(response.as_numpy("OUTPUT_0").tolist().decode())
                )

            results = {}
            for d in response_batches:
                results.update(d)
            return results
        else:
            sentence_obj = np.array(sentence_list).reshape(-1, 1).astype(object)
            triton_inputs = [
                grpcclient.InferInput("INPUT_0", sentence_obj.shape, "BYTES")
            ]
            triton_inputs[0].set_data_from_numpy(sentence_obj)

            triton_outputs = [grpcclient.InferRequestedOutput("OUTPUT_0")]

            response = self.triton_client.infer(
                self.model_name, triton_inputs, outputs=triton_outputs
            )

            return ast.literal_eval(response.as_numpy("OUTPUT_0").tolist().decode())

In [4]:
client_decoder = ClientDecoder(TRITON_SERVER_URL, MODEL_NAME, MODEL_VERSION)

In [5]:
responses = client_decoder.submit(STRING_LIST)

In [6]:
responses

{"NVIDIA is founded by Jensen Huang , Chris Malachowsky and Curtis Priem . '": [{'entity_group': 'ORG',
   'start': 0,
   'word': 'NVIDIA',
   'end': 6,
   'score': 99},
  {'entity_group': 'PER',
   'start': 21,
   'word': 'Jensen Huang',
   'end': 33,
   'score': 99},
  {'entity_group': 'PER',
   'start': 35,
   'word': 'Chris Malachowsky',
   'end': 52,
   'score': 99},
  {'entity_group': 'PER',
   'start': 57,
   'word': 'Curtis Priem',
   'end': 69,
   'score': 99}],
 "At the time , there were more than two dozen graphics chips companies , a number that would soar to 70 three years later . '": [],
 "By 2006 , NVIDIA was the only independent still operating . '": [{'entity_group': 'ORG',
   'start': 9,
   'word': 'NVIDIA',
   'end': 15,
   'score': 99}],
 'NVIDIA \'s reaches its first strategic partnership with SGS-Thomson Microelectronics to manufacture the company \'s single-chip graphical-user interface accelerator . "': [{'entity_group': 'ORG',
   'start': 0,
   'word': 'NVIDIA'