# Example of using Triton Server Wrapper in Jupyter notebook

## Triton server setup with custom model

Install dependencies

In [None]:
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install cupy-cuda12x --extra-index-url=https://pypi.ngc.nvidia.com

Required imports:

In [None]:
import numpy as np

from pytriton.decorators import batch
from pytriton.model_config import ModelConfig, Tensor
from pytriton.triton import Triton

Define inference callable:

In [None]:
@batch
def _add_sub(**inputs):
    a_batch, b_batch = inputs.values()
    add_batch = a_batch + b_batch
    sub_batch = a_batch - b_batch
    return {"add": add_batch, "sub": sub_batch}

Instantiate titon wrapper class and load model with defined callable:

In [None]:
triton = Triton()

In [None]:
triton.bind(
        model_name="AddSub",
        infer_func=_add_sub,
        inputs=[
            Tensor(dtype=np.float32, shape=(-1,)),
            Tensor(dtype=np.float32, shape=(-1,)),
        ],
        outputs=[
            Tensor(name="add", dtype=np.float32, shape=(-1,)),
            Tensor(name="sub", dtype=np.float32, shape=(-1,)),
        ],
        config=ModelConfig(max_batch_size=128),
    )

Run triton server with defined model inference callable

In [None]:
triton.run()

## Example inference performed with ModelClient calling triton server

In [None]:
from pytriton.client import ModelClient
batch_size = 2
a_batch = np.ones((batch_size, 1), dtype=np.float32)
b_batch = np.ones((batch_size, 1), dtype=np.float32)

In [None]:
with ModelClient("localhost", "AddSub") as client:
    result_batch = client.infer_batch(a_batch, b_batch)

for output_name, data_batch in result_batch.items():
    print(f"{output_name}: {data_batch.tolist()}")

## Re-setup triton server with modified inference callable

Stop triton server

In [None]:
triton.stop()

Redefine inference callable

In [None]:
@batch
def _add_sub(**inputs):
    a_batch, b_batch = inputs.values()
    add_batch = (a_batch + b_batch) * 2
    sub_batch = (a_batch - b_batch) * 3
    return {"add": add_batch, "sub": sub_batch}

Load model again

In [None]:
triton.bind(
        model_name="AddSub",
        infer_func=_add_sub,
        inputs=[
            Tensor(dtype=np.float32, shape=(-1,)),
            Tensor(dtype=np.float32, shape=(-1,)),
        ],
        outputs=[
            Tensor(name="add", dtype=np.float32, shape=(-1,)),
            Tensor(name="sub", dtype=np.float32, shape=(-1,)),
        ],
        config=ModelConfig(max_batch_size=128),
    )

Run triton server with new model inference callable

In [None]:
triton.run()

## The same inference performed with modified inference callable

In [None]:
with ModelClient("localhost", "AddSub") as client:
    result_batch = client.infer_batch(a_batch, b_batch)

for output_name, data_batch in result_batch.items():
    print(f"{output_name}: {data_batch.tolist()}")

Stop server at the end

In [None]:
triton.stop()