In [1]:
# Copyright 2021 NVIDIA Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# 1.Overview

In this notebook, we want to provide a tutorial about how to make inference using HugeCTR trained WDL model. And we can collect the inference benchmark by Triton performance analyzer tool.

1. Overview
2. Generate the WDL deployment Configuration
3. Load Models on the Triton Server
4. Prepare Inference Input Data 
5. Inference Benchmarm by Triton Performance Tool

# 2. Generate the WDL Deployment Configuration

## 2.1 Generate related model folders

In [2]:
# define some data folder to store the model related files
# Standard Libraries
import os
from time import time
import re
import shutil
import glob
import warnings

BASE_DIR = "wdl_infer"
model_folder  = os.path.join(BASE_DIR, "model")
wdl_model_repo= os.path.join(model_folder, "wdl")
wdl_version =os.path.join(wdl_model_repo, "1")

if os.path.isdir(model_folder):
    shutil.rmtree(model_folder)
os.makedirs(model_folder)

if os.path.isdir(wdl_model_repo):
    shutil.rmtree(wdl_model_repo)
os.makedirs(wdl_model_repo)

if os.path.isdir(wdl_version):
    shutil.rmtree(wdl_version)
os.makedirs(wdl_version)

## 2.2 Copy WDL model files and configuration to model repository

In [3]:
!cp -r wdl_train/wdl0_sparse_2000.model $wdl_version/
!cp -r wdl_train/wdl1_sparse_2000.model $wdl_version/
!cp -r wdl_train/wdl_dense_2000.model $wdl_version/
!cp wdl_train/wdl_infer.json $wdl_version/
!ls -l $wdl_version

total 5840
drwxr-xr-x 2 root root    4096 Jun 28 05:48 wdl0_sparse_2000.model
drwxr-xr-x 2 root root    4096 Jun 28 05:48 wdl1_sparse_2000.model
-rw-r--r-- 1 root root 5963780 Jun 28 05:48 wdl_dense_2000.model
-rw-r--r-- 1 root root    3158 Jun 28 05:48 wdl_infer.json


## 2.3 Generate the Triton configuration for deploying WDL

In [14]:
%%writefile wdl_infer/model/wdl/config.pbtxt
name: "wdl"
backend: "hugectr"
max_batch_size:1,
input [
   {
    name: "DES"
    data_type: TYPE_FP32
    dims: [ -1 ]
  },
  {
    name: "CATCOLUMN"
    data_type: TYPE_INT64
    dims: [ -1 ]
  },
  {
    name: "ROWINDEX"
    data_type: TYPE_INT32
    dims: [ -1 ]
  }
]
output [
  {
    name: "OUTPUT0"
    data_type: TYPE_FP32
    dims: [ -1 ]
  }
]
instance_group [
  {
    count: 1
    kind : KIND_GPU
    gpus:[0]
  }
]

parameters [
  {
  key: "config"
  value: { string_value: "/wdl_infer/model/wdl/1/wdl.json" }
  },
  {
  key: "gpucache"
  value: { string_value: "true" }
  },
  {
  key: "hit_rate_threshold"
  value: { string_value: "0.8" }
  },
  {
  key: "gpucacheper"
  value: { string_value: "0.5" }
  },
  {
  key: "label_dim"
  value: { string_value: "1" }
  },
  {
  key: "slots"
  value: { string_value: "28" }
  },
  {
  key: "cat_feature_num"
  value: { string_value: "28" }
  },
 {
  key: "des_feature_num"
  value: { string_value: "13" }
  },
  {
  key: "max_nnz"
  value: { string_value: "2" }
  },
  {
  key: "embedding_vector_size"
  value: { string_value: "128" }
  },
  {
  key: "embeddingkey_long_type"
  value: { string_value: "true" }
  }
]

Overwriting wdl_infer/model/wdl/config.pbtxt


## 2.4 Generate the Hugectr Backend parameter server configuration for deploying wdl

In [5]:
%%writefile wdl_infer/model/ps.json
{
    "supportlonglong":true,
    "models":[
        {
            "model":"wdl",
            "sparse_files":["wdl_infer/model/wdl/1/wdl0_sparse_2000.model", "wdl_train/model/wdl/1/wdl1_sparse_2000.model"],
            "dense_file":"wdl_infer/model/wdl/1/wdl_dense_2000.model",
            "network_file":"wdl_infer/model/wdl/1/wdl.json"
        }
    ]  
}

Writing wdl_infer/model/ps.json


# 3.Deploy WDL on Triton Server 

# 4. Prepare Inference Input Data

### 4.1 Read validation data

In [20]:
!ls -l wdl_train/val

total 760208
-rw-r--r-- 1 root root 125742229 Jun 28 06:11 0.57bb2e5fca5f4dc1aace4ce9ad531a2d.parquet
-rw-r--r-- 1 root root 142891704 Jun 28 06:48 0.db790513beb041569f47f5cdcb047f9b.parquet
-rw-r--r-- 1 root root        59 Jun 28 06:48 _file_list.txt
-rw-r--r-- 1 root root     27701 Jun 28 06:48 _metadata
-rw-r--r-- 1 root root      1537 Jun 28 06:48 _metadata.json
drwxr-xr-x 2 root root      4096 Jun 28 06:46 temp-parquet-after-conversion
-rw-r--r-- 1 root root 509766965 Jun 25 08:07 test.txt


In [21]:
import pandas as pd
df = pd.read_parquet("wdl_train/val/0.db790513beb041569f47f5cdcb047f9b.parquet")

In [22]:
df.head()

Unnamed: 0,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,...,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26
0,-0.008003,-0.544749,-0.487016,-0.157301,-0.224758,-0.206385,-0.064249,-0.28181,-0.687732,-0.470383,...,1,666,5,0,0,0,32831,8098,10,28
1,0.032786,-0.475468,1.229957,-0.088692,-0.131771,-0.206385,-0.064249,-0.271376,0.46906,-0.470383,...,2,666,10,0,147138,0,22292,3629,12,5
2,-0.059432,1.911341,-0.272394,-0.088692,8.887988,1.442828,1.953991,-0.279201,0.541359,1.386036,...,0,666,10,69747,76381,207280,0,3602,73,22
3,-0.055886,0.884347,-0.272394,-0.157301,-0.06978,-0.206385,-0.064249,-0.148777,-0.326234,-0.470383,...,3,575,12,20820,99774,20188,56495,7153,10,27
4,-0.057659,-0.465958,0.371471,-0.157301,-0.224758,-0.206385,-0.064249,-0.279201,0.035263,-0.470383,...,0,666,2,69747,76381,207280,0,5421,12,22


In [23]:
df.head(200000).to_csv('wdl_infer/infer_test.txt', sep='\t', index=False,header=True)

## 4.2 Follow the Triton requirements to generate input data with json format

In [13]:
%%writefile wdl_infer/criteo2predict.py
import argparse
import sys
import numpy as np
import pandas as pd
import json
import pickle

def parse_config(src_config):
    try:
        with open(src_config, 'r') as data_json:
            j_data = json.load(data_json)
            dense_dim = j_data["dense"]
            categorical_dim = j_data["categorical"]
            slot_size = j_data["slot_size"]
        assert(categorical_dim == np.sum(slot_size))
        return dense_dim, categorical_dim, slot_size
    except:
        print("Invalid data configuration file!")

def convert(src_csv, src_config, dst, batch_size, segmentation):
    dense_dim, categorical_dim, slot_size = parse_config(src_config)
    slot_size_array=[249058, 19561, 14212, 6890, 18592, 4, 6356, 1254, 52, \
                     226170, 80508, 72308, 11, 2169, 7597, 61, 4, 923, 15, 249619, 168974, 243480, 68212, 9169, 75, 34, 278018, 415262]
    offset = np.insert(np.cumsum(slot_size_array), 0, 0)[:-1]
    total_columns = 1 + dense_dim + categorical_dim
    df = pd.read_csv(src_csv,  sep='\t', nrows=batch_size)
    cols = df.columns
    slot_num = len(slot_size)
    row_ptrs = [0 for _ in range(batch_size*slot_num + 1)]
    for i in range(1, len(row_ptrs)):
        row_ptrs[i] = row_ptrs[i-1] + slot_size[(i-1)%slot_num]
    label_df =  pd.DataFrame(df['label'].values.reshape(1,batch_size))
    dense_df = pd.DataFrame(df[['I'+str(i+1) for i in range(dense_dim)]].values.reshape(1, batch_size*dense_dim))
    embedding_columns_df = pd.DataFrame(df[['C'+str(i+1) for i in range(categorical_dim)]].values.reshape(1, batch_size*categorical_dim))
    row_ptrs_df = pd.DataFrame(np.array(row_ptrs).reshape(1, batch_size*slot_num + 1))
    with open(dst, 'w') as dst_txt:
        dst_txt.write("{\n\"data\":[\n{\n")
        dst_txt.write("\"DES\":")
        dst_txt.write(','.join('%s' %id for id in dense_df.values.tolist()))
        dst_txt.write(",\n\"CATCOLUMN\":")
        dst_txt.write(','.join('%s' %id for id in (embedding_columns_df.values.reshape(-1,26)+offset).reshape(1,-1).tolist()))
        dst_txt.write(",\n\"ROWINDEX\":")
        dst_txt.write(','.join('%s' %id for id in row_ptrs_df.values.tolist()))
        dst_txt.write("\n}\n]\n}")

if __name__ == '__main__':
    arg_parser = argparse.ArgumentParser(description='Convert Preprocessed Criteo Data to Inference Format')
    arg_parser.add_argument('--src_csv_path', type=str, required=True)
    arg_parser.add_argument('--src_config_path', type=str, required=True)
    arg_parser.add_argument('--dst_path', type=str, required=True)
    arg_parser.add_argument('--batch_size', type=int, default=128)
    arg_parser.add_argument('--segmentation', type=str, default=' ')
    args = arg_parser.parse_args()
    src_csv_path = args.src_csv_path
    segmentation = args.segmentation
    src_config_path = args.src_config_path
    dst_path = args.dst_path
    batch_size = args.batch_size
    convert(src_csv_path, src_config_path, dst_path, batch_size, segmentation)

Writing wdl_infer/criteo2predict.py


## 4.3 Define inference input data format

In [17]:
 %%writefile wdl_infer/wdl_input_format.json
{
    "dense": 13,
    "categorical": 28,
    "slot_size": [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
}

Writing wdl_infer/wdl_input_format.json


In [18]:
batchsize=1
!python3 wdl_infer/criteo2predict.py --src_csv_path=wdl_infer/infer_test.txt --src_config_path=wdl_infer/wdl_input_format.json --dst_path wdl_infer/$batchsize".json" --batch_size=$batchsize --segmentation=','

Traceback (most recent call last):
  File "wdl_infer/criteo2predict.py", line 59, in <module>
    convert(src_csv_path, src_config_path, dst_path, batch_size, segmentation)
  File "wdl_infer/criteo2predict.py", line 34, in convert
    embedding_columns_df = pd.DataFrame(df[['C'+str(i+1) for i in range(categorical_dim)]].values.reshape(1, batch_size*categorical_dim))
  File "/usr/local/lib/python3.8/dist-packages/pandas/core/frame.py", line 2912, in __getitem__
    indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
  File "/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py", line 1254, in _get_listlike_indexer
    self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
  File "/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py", line 1304, in _validate_read_indexer
    raise KeyError(f"{not_found} not in index")
KeyError: "['C27', 'C28'] not in index"
