## Step 1: Prepare Environment

Import libraries and set logging

In [0]:
import logging

import mlflow
import model_navigator as nav
import torch
from pyspark.ml.functions import predict_batch_udf
from pyspark.sql.types import ArrayType, FloatType
from sentence_transformers import SentenceTransformer

from stnavigator import SentenceTransformerNavigator
from utility import benchmarkGPU, generate_1M_data

# Adjust logging levels
logging.getLogger('py4j').setLevel(logging.ERROR)
logging.getLogger("sentence_transformers")
logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
mlflow.autolog(disable=True)

INFO:py4j.clientserver:Received command c on object id p0


## Step 2: Load public data

In this notebook we will explore a dataset of fine food reviews

In [0]:
df_max = generate_1M_data(spark, "wasbs://publicwasb@mmlspark.blob.core.windows.net/fine_food_reviews_1k.csv")

## Step 3: Set prediction model

Set encoding using NVIDIA SentenceTransformerNavigator with TRT acceleration using Model Navigator

In [0]:
def predict_batch_fn():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = SentenceTransformerNavigator("intfloat/e5-large-v2").eval()
    model = nav.Module(model, name="e5-large-v2")
    model = model.to(device)
    nav.load_optimized()

    def predict(inputs):
        with torch.no_grad():
            output = model.encode(inputs.tolist(), convert_to_tensor=False, show_progress_bar=True)
        return output
    return predict

encode = predict_batch_udf(predict_batch_fn, return_type=ArrayType(FloatType()), batch_size=10) 

## Step 4: Benchmark different scale of input data

We will print duration of different stages of the experiment (SentanseTransformer NVIDIA TensorRT embedding with Rapids KNN)

For example: [100, 1000, 10000, 100000] rows of text data

In [0]:
specified_values = [100, 1000, 10000, 100000]

print(f"********  Test TRT E5 with IVFlat KNN  ************")
print()

for lim in specified_values:
  benchmarkGPU(df_max, lim, encode)


********  Test TRT E5 with IVFlat KNN  ************

Scale 100 rows
+---------------+--------------+
|Benchmark Name |Duration (sec)|
+---------------+--------------+
|Embeddings     |25.47         |
|KNN            |13.71         |
|All stages     |39.18         |
+---------------+--------------+

Scale 1000 rows
