In [2]:
import hugectr
from hugectr.tools import DataGeneratorParams, DataGenerator

data_generator_params = DataGeneratorParams(
  format = hugectr.DataReaderType_t.Parquet,
  label_dim = 1,
  dense_dim = 10,
  num_slot = 4,
  i64_input_key = True,
  nnz_array = [1, 1, 1, 1],
  source = "./data_parquet/file_list.txt",
  eval_source = "./data_parquet/file_list_test.txt",
  slot_size_array = [10000, 10000, 10000, 10000],
  check_type = hugectr.Check_t.Non,
  dist_type = hugectr.Distribution_t.PowerLaw,
  power_law_type = hugectr.PowerLaw_t.Short,
  num_files = 16,
  eval_num_files = 4,
  num_samples_per_file = 40960)
data_generator = DataGenerator(data_generator_params)
data_generator.generate()

[HCTR][07:46:25.518][INFO][RK0][main]: Generate Parquet dataset
[HCTR][07:46:25.518][INFO][RK0][main]: train data folder: ./data_parquet, eval data folder: ./data_parquet, slot_size_array: 10000, 10000, 10000, 10000, nnz array: 1, 1, 1, 1, #files for train: 16, #files for eval: 4, #samples per file: 40960, Use power law distribution: 1, alpha of power law: 1.3
[HCTR][07:46:25.518][INFO][RK0][main]: ./data_parquet exist
[HCTR][07:46:25.518][INFO][RK0][main]: ./data_parquet exist
[HCTR][07:46:25.518][INFO][RK0][main]: ./data_parquet/train exist
[HCTR][07:46:25.518][INFO][RK0][main]: ./data_parquet/train/gen_0.parquet
[HCTR][07:46:25.663][INFO][RK0][main]: ./data_parquet/train/gen_1.parquet
[HCTR][07:46:25.811][INFO][RK0][main]: ./data_parquet/train/gen_2.parquet
[HCTR][07:46:25.937][INFO][RK0][main]: ./data_parquet/train/gen_3.parquet
[HCTR][07:46:26.071][INFO][RK0][main]: ./data_parquet/train/gen_4.parquet
[HCTR][07:46:26.194][INFO][RK0][main]: ./data_parquet/train/gen_5.parquet
[HCTR][

In [3]:
!pwd

/workspace/merlin/hugectr_inference_backend/hps_backend/examples


In [4]:
!mkdir hps_model

mkdir: cannot create directory ‘hps_model’: File exists


In [5]:
%%writefile fix_meta_json_path.py

import json
file_path_train = './data_parquet/train/_metadata.json'
file_path_val   = './data_parquet/val/_metadata.json'
def fix_meta_json_path(file_path):
    with open(file_path) as f:
        data = json.load(f)
        
    for item in data['file_stats']:
        item['file_name'] = "gen_{}".format(item['file_name'])
        print(item)
    
    with open(file_path, 'w') as f:
        json.dump(data, f)
fix_meta_json_path(file_path_train)
fix_meta_json_path(file_path_val)

Overwriting fix_meta_json_path.py


In [6]:
!python3 fix_meta_json_path.py

{'file_name': 'gen_0.parquet', 'num_rows': 40960}
{'file_name': 'gen_1.parquet', 'num_rows': 40960}
{'file_name': 'gen_2.parquet', 'num_rows': 40960}
{'file_name': 'gen_3.parquet', 'num_rows': 40960}
{'file_name': 'gen_4.parquet', 'num_rows': 40960}
{'file_name': 'gen_5.parquet', 'num_rows': 40960}
{'file_name': 'gen_6.parquet', 'num_rows': 40960}
{'file_name': 'gen_7.parquet', 'num_rows': 40960}
{'file_name': 'gen_8.parquet', 'num_rows': 40960}
{'file_name': 'gen_9.parquet', 'num_rows': 40960}
{'file_name': 'gen_10.parquet', 'num_rows': 40960}
{'file_name': 'gen_11.parquet', 'num_rows': 40960}
{'file_name': 'gen_12.parquet', 'num_rows': 40960}
{'file_name': 'gen_13.parquet', 'num_rows': 40960}
{'file_name': 'gen_14.parquet', 'num_rows': 40960}
{'file_name': 'gen_15.parquet', 'num_rows': 40960}
{'file_name': 'gen_0.parquet', 'num_rows': 40960}
{'file_name': 'gen_1.parquet', 'num_rows': 40960}
{'file_name': 'gen_2.parquet', 'num_rows': 40960}
{'file_name': 'gen_3.parquet', 'num_rows': 4

In [7]:
import pandas as pd

In [8]:
df = pd.read_parquet("./data_parquet/train/gen_0.parquet")
df.head()

Unnamed: 0,_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14
0,0.565468,0.552473,0.880794,0.419513,0.776548,0.606234,0.528292,0.97861,0.240545,0.944232,0.828531,135,32,1,5
1,0.965076,0.891845,0.294612,0.117228,0.919538,0.855993,0.228353,0.402679,0.377469,0.890093,0.428936,11,28,264,4
2,0.196433,0.943692,0.506197,0.306889,0.598248,0.853011,0.245981,0.503549,0.609469,0.741337,0.568564,1,11,15,9
3,0.718024,0.367026,0.87309,0.375533,0.101961,0.530518,0.467618,0.102497,0.451634,0.022515,0.689176,15,0,597,6
4,0.537616,0.227445,0.333675,0.550674,0.348952,0.707912,0.655634,0.638966,0.940576,0.218253,0.037498,3571,0,1,9


In [9]:
import os
from time import time
import re
import shutil
import glob
import warnings

BASE_DIR = "/hps_demo"
embedding_folder  = os.path.join(BASE_DIR, "embedding")
wdl_embedding_repo= os.path.join(embedding_folder, "hps_infer")
wdl_version =os.path.join(wdl_embedding_repo, "1")

if os.path.isdir(embedding_folder):
    shutil.rmtree(embedding_folder)
os.makedirs(embedding_folder)

if os.path.isdir(wdl_embedding_repo):
    shutil.rmtree(wdl_embedding_repo)
os.makedirs(wdl_embedding_repo)

if os.path.isdir(wdl_version):
    shutil.rmtree(wdl_version)
os.makedirs(wdl_version)

In [11]:
!tree -l $BASE_DIR

[01;34m/hps_demo[00m
└── [01;34membedding[00m
    └── [01;34mhps_infer[00m
        └── [01;34m1[00m

3 directories, 0 files


In [12]:
%%writefile hps_model_train.py

import hugectr
from mpi4py import MPI

## typical DLRM architecture building
## Bottom layer: bottom MLP layer for dense features(10) + embedding layer for sparse features(2+2)
## Middle layer: concatenate 3 blocks
## Top layer: top MLP layer to fully connect all inputs (FC twice + RELU + BinaryCrossEntropy)

# construct model
solver = hugectr.CreateSolver(model_name = "hps_train",
                              max_eval_batches = 1,
                              batchsize_eval = 1024,
                              batchsize = 1024,
                              lr = 0.001,
                              vvgpu = [[0]],
                              i64_input_key = True,
                              repeat_dataset = True,
                              use_cuda_graph = True)
reader = hugectr.DataReaderParams(data_reader_type = hugectr.DataReaderType_t.Parquet,
                                  source = ["./data_parquet/file_list.txt"],
                                  eval_source = "./data_parquet/file_list_test.txt",
                                  check_type = hugectr.Check_t.Non,
                                  slot_size_array = [10000, 10000, 10000, 10000])
optimizer = hugectr.CreateOptimizer(optimizer_type = hugectr.Optimizer_t.Adam)
model = hugectr.Model(solver, reader, optimizer)

# model NN

# https://nvidia-merlin.github.io/HugeCTR/master/api/python_interface.html?highlight=model#input-layer
# check for "data_reader_sparse_param_array" parameter, 4 sparse feature in this case
# assigned 2 sparse feat for slot1, 2 sparse feat for slot2
model.add(hugectr.Input(label_dim = 1, label_name = "label",
                        dense_dim = 10, dense_name = "dense",
                        data_reader_sparse_param_array = 
                        [hugectr.DataReaderSparseParam("data1", [1, 1], True, 2),
                        hugectr.DataReaderSparseParam("data2", [1, 1], True, 2)]))

# sparse layer for categorical features
# https://nvidia-merlin.github.io/HugeCTR/master/api/python_interface.html?highlight=model#sparseembedding
# sparse layer should be defined after Input layer, but before Dense layer
# for embedding_type, check https://nvidia-merlin.github.io/HugeCTR/master/api/hugectr_layer_book.html#embedding-types-detail
model.add(hugectr.SparseEmbedding(embedding_type = hugectr.Embedding_t.DistributedSlotSparseEmbeddingHash, 
                            workspace_size_per_gpu_in_mb = 4,
                            embedding_vec_size = 16,
                            combiner = "sum",
                            sparse_embedding_name = "sparse_embedding1",
                            bottom_name = "data1",
                            optimizer = optimizer))
model.add(hugectr.SparseEmbedding(embedding_type = hugectr.Embedding_t.DistributedSlotSparseEmbeddingHash, 
                            workspace_size_per_gpu_in_mb = 8,
                            embedding_vec_size = 32,
                            combiner = "sum",
                            sparse_embedding_name = "sparse_embedding2",
                            bottom_name = "data2",
                            optimizer = optimizer))
# reshape
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Reshape,
                            bottom_names = ["sparse_embedding1"],
                            top_names = ["reshape1"],
                            leading_dim=32))                            
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Reshape,
                            bottom_names = ["sparse_embedding2"],
                            top_names = ["reshape2"],
                            leading_dim=64))

model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Concat,
                            bottom_names = ["reshape1", "reshape2", "dense"], top_names = ["concat1"]))

# FC layer + ReLU + FC + binary cross entropy
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.InnerProduct,
                            bottom_names = ["concat1"],
                            top_names = ["fc1"],
                            num_output=1024))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.ReLU,
                            bottom_names = ["fc1"],
                            top_names = ["relu1"]))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.InnerProduct,
                            bottom_names = ["relu1"],
                            top_names = ["fc2"],
                            num_output=1))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.BinaryCrossEntropyLoss,
                            bottom_names = ["fc2", "label"],
                            top_names = ["loss"]))

# model compile
model.compile()
model.summary()
model.graph_to_json("./hps_model/hps_train.json")
model.fit(max_iter = 1100, display = 200, eval_interval = 1000, snapshot = 1000, snapshot_prefix = "./hps_model/hps_train")
model.export_predictions("./hps_model/hps_train_pred_" + str(1000), "./hps_model/hps_train_label_" + str(1000))

Overwriting hps_model_train.py


In [13]:
!python3 hps_model_train.py

HugeCTR Version: 3.6
[HCTR][07:48:08.807][INFO][RK0][main]: Initialize model: hps_train
[HCTR][07:48:08.807][INFO][RK0][main]: Global seed is 617217921
[HCTR][07:48:08.968][INFO][RK0][main]: Device to NUMA mapping:
  GPU 0 ->  node 3
[HCTR][07:48:10.454][INFO][RK0][main]: Start all2all warmup
[HCTR][07:48:10.456][INFO][RK0][main]: End all2all warmup
[HCTR][07:48:10.457][INFO][RK0][main]: Using All-reduce algorithm: NCCL
[HCTR][07:48:10.458][INFO][RK0][main]: Device 0: NVIDIA A100-SXM4-80GB
[HCTR][07:48:10.458][INFO][RK0][main]: num of DataReader workers: 1
[HCTR][07:48:10.459][INFO][RK0][main]: Vocabulary size: 40000
[HCTR][07:48:10.459][INFO][RK0][main]: max_vocabulary_size_per_gpu_=21845
[HCTR][07:48:10.461][DEBUG][RK0][tid #140051467990784]: file_name_ ./data_parquet/val/gen_0.parquet file_total_rows_ 40960
[HCTR][07:48:10.461][DEBUG][RK0][tid #140051581523712]: file_name_ ./data_parquet/train/gen_0.parquet file_total_rows_ 40960
[HCTR][07:48:10.465][INFO][RK0][main]: max_vocabulary

In [14]:
!tree -l $BASE_DIR

[01;34m/hps_demo[00m
└── [01;34membedding[00m
    └── [01;34mhps_infer[00m
        └── [01;34m1[00m

3 directories, 0 files


In [15]:
!tree -l hps_model

[01;34mhps_model[00m
├── hps_train.json
├── hps_train0_opt_sparse_1000.model
├── [01;34mhps_train0_sparse_1000.model[00m
│   ├── emb_vector
│   └── key
├── hps_train1_opt_sparse_1000.model
├── [01;34mhps_train1_sparse_1000.model[00m
│   ├── emb_vector
│   └── key
├── hps_train_dense_1000.model
├── hps_train_label_1000
├── hps_train_opt_dense_1000.model
├── hps_train_pred_1000
└── infer_test.csv

2 directories, 12 files


In [16]:
!cp -r ./hps_model/hps_train0_sparse_1000.model /hps_demo/embedding/hps_infer/1
!cp -r ./hps_model/hps_train1_sparse_1000.model /hps_demo/embedding/hps_infer/1
!tree -l /hps_demo

[01;34m/hps_demo[00m
└── [01;34membedding[00m
    └── [01;34mhps_infer[00m
        └── [01;34m1[00m
            ├── [01;34mhps_train0_sparse_1000.model[00m
            │   ├── emb_vector
            │   └── key
            └── [01;34mhps_train1_sparse_1000.model[00m
                ├── emb_vector
                └── key

5 directories, 4 files


In [31]:
# prepare infer_test file

In [17]:
df = pd.read_parquet("./data_parquet/val/gen_0.parquet")
df.head()

Unnamed: 0,_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14
0,0.369659,0.562382,0.268092,0.270976,0.425515,0.436015,0.794521,0.969748,0.11885,0.31795,0.050502,1,434,1026,13
1,0.105705,0.476559,0.835602,0.103531,0.256015,0.835396,0.476131,0.92322,0.89987,0.008765,0.691802,0,0,23,9
2,0.818356,0.25572,0.89625,0.023801,0.751817,0.846724,0.261466,0.645097,0.173824,0.348452,0.533557,2,11,184,7
3,0.201031,0.303372,0.502298,0.366995,0.75415,0.27013,0.811643,0.322071,0.037592,0.338294,0.373525,2,0,10,0
4,0.923416,0.532531,0.791524,0.313665,0.763071,0.649208,0.176048,0.956767,0.2191,0.629382,0.667392,50,9,1,46


In [18]:
CATEGORICAL_COLUMNS=["C" + str(x) for x in range(1, 5)]
CONTINUOUS_COLUMNS=["I" + str(x) for x in range(1, 11)]
LABEL_COLUMNS = ['label']
cols = LABEL_COLUMNS + CONTINUOUS_COLUMNS + CATEGORICAL_COLUMNS
cols

['label',
 'I1',
 'I2',
 'I3',
 'I4',
 'I5',
 'I6',
 'I7',
 'I8',
 'I9',
 'I10',
 'C1',
 'C2',
 'C3',
 'C4']

In [19]:
df.set_axis(cols, axis=1,inplace=True)
df.head()

Unnamed: 0,label,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,C1,C2,C3,C4
0,0.369659,0.562382,0.268092,0.270976,0.425515,0.436015,0.794521,0.969748,0.11885,0.31795,0.050502,1,434,1026,13
1,0.105705,0.476559,0.835602,0.103531,0.256015,0.835396,0.476131,0.92322,0.89987,0.008765,0.691802,0,0,23,9
2,0.818356,0.25572,0.89625,0.023801,0.751817,0.846724,0.261466,0.645097,0.173824,0.348452,0.533557,2,11,184,7
3,0.201031,0.303372,0.502298,0.366995,0.75415,0.27013,0.811643,0.322071,0.037592,0.338294,0.373525,2,0,10,0
4,0.923416,0.532531,0.791524,0.313665,0.763071,0.649208,0.176048,0.956767,0.2191,0.629382,0.667392,50,9,1,46


In [20]:
df.to_csv('./hps_model/infer_test.csv', sep=',', index=False,header=True)

In [21]:
%%writefile hps_train2predict.py

# validation
from hugectr.inference import InferenceParams, CreateInferenceSession
import hugectr
import pandas as pd
import numpy as np
import sys
from mpi4py import MPI

def demo_inference(model_name, network_file, dense_file, embedding_file_list, data_file,enable_cache):
    # CATEGORICAL_COLUMNS=["C1_C2","C3_C4"]+["C" + str(x) for x in range(1, 5)]
    CATEGORICAL_COLUMNS=["C" + str(x) for x in range(1, 5)]
    CONTINUOUS_COLUMNS=["I" + str(x) for x in range(1, 11)]
    LABEL_COLUMNS = ['label']
    
    emb_size = [10000, 10000, 10000, 10000]
    shift = np.insert(np.cumsum(emb_size), 0, 0)[:-1]
    
    test_df = pd.read_csv(data_file,sep=',')
    config_file = network_file
    
    # row_ptrs = list(range(0,21))+list(range(0,261))
    row_ptrs = list(range(0,15))
    
    dense_features =  list(test_df[CONTINUOUS_COLUMNS].values.flatten())
    test_df[CATEGORICAL_COLUMNS].astype(np.int64)
    embedding_columns = list((test_df[CATEGORICAL_COLUMNS]+shift).values.flatten())

    # create parameter server, embedding cache and inference session
    inference_params = InferenceParams(model_name = model_name,
                                max_batchsize = 64,
                                hit_rate_threshold = 0.9,
                                dense_model_file = dense_file,
                                sparse_model_files = embedding_file_list,
                                device_id = 0,
                                use_gpu_embedding_cache = enable_cache,
                                cache_size_percentage = 0.9,
                                i64_input_key = True,
                                use_mixed_precision = False
                                )
    inference_session = CreateInferenceSession(config_file, inference_params)
    # TODO: check VSCR example for hugectr inference
    # https://gitlab-master.nvidia.com/dl/hugectr/hugectr_inference_backend/-/blob/main/docs/architecture.md#vcsr-example
    output = inference_session.predict(dense_features, embedding_columns, row_ptrs)
    print("HPS demo multi-embedding table inference result is {}".format(output))

# def demo_lookup(model_name, network_file, dense_file, embedding_file_list, data_file,enable_cache):
#     # CATEGORICAL_COLUMNS=["C1_C2","C3_C4"]+["C" + str(x) for x in range(1, 5)]
#     CATEGORICAL_COLUMNS=["C" + str(x) for x in range(1, 5)]
#     CONTINUOUS_COLUMNS=["I" + str(x) for x in range(1, 11)]
#     LABEL_COLUMNS = ['label']
    
#     emb_size = [10000, 10000, 10000, 10000]
#     shift = np.insert(np.cumsum(emb_size), 0, 0)[:-1]
#     test_df = pd.read_csv(data_file,sep=',')
#     config_file = network_file
    
# #     row_ptrs = list(range(0,21))+list(range(0,261))
#     row_ptrs = list([0,2,4])
    
#     dense_features =  list(test_df[CONTINUOUS_COLUMNS].values.flatten())
#     test_df[CATEGORICAL_COLUMNS].astype(np.int64)
#     embedding_columns = list((test_df[CATEGORICAL_COLUMNS]+shift).values.flatten())

#     # create parameter server, embedding cache and inference session
#     inference_params = InferenceParams(model_name = model_name,
#                                 max_batchsize = 64,
#                                 hit_rate_threshold = 0.9,
#                                 dense_model_file = dense_file,
#                                 sparse_model_files = embedding_file_list,
#                                 device_id = 0,
#                                 use_gpu_embedding_cache = enable_cache,
#                                 cache_size_percentage = 0.9,
#                                 i64_input_key = True,
#                                 use_mixed_precision = False
#                                 )
#     inference_session = CreateInferenceSession(config_file, inference_params)
#     # TODO: check VSCR example for hugectr inference
#     # https://gitlab-master.nvidia.com/dl/hugectr/hugectr_inference_backend/-/blob/main/docs/architecture.md#vcsr-example
#     output = inference_session.predict(dense_features, embedding_columns, row_ptrs)
#     print("HPS demo multi-embedding table inference result is {}".format(output))
    
if __name__ == "__main__":
    model_name = sys.argv[1]
    network_file = sys.argv[2]
    dense_file = sys.argv[3]
    embedding_file_list = str(sys.argv[4]).split(',')
    print(embedding_file_list)
    data_file = sys.argv[5]
  

    #demo_inference(model_name, network_file, dense_file, embedding_file_list, data_file, True,hugectr.Database_t.Redis)
    demo_inference(model_name, network_file, dense_file, embedding_file_list, data_file, True)
    #demo_inference(model_name, network_file, dense_file, embedding_file_list, data_file, False)


Overwriting hps_train2predict.py


In [22]:
!python hps_train2predict.py \
    "hps_train" \
    "./hps_model/hps_train.json" \
    "./hps_model/hps_train_dense_1000.model" \
    "./hps_model/hps_train0_sparse_1000.model,./hps_model/hps_train1_sparse_1000.model" \
    "./hps_model/infer_test.csv"

['./hps_model/hps_train0_sparse_1000.model', './hps_model/hps_train1_sparse_1000.model']
[HCTR][08:00:50.397][INFO][RK0][main]: default_emb_vec_value is not specified using default: 0
[HCTR][08:00:50.397][INFO][RK0][main]: default_emb_vec_value is not specified using default: 0
[HCTR][08:00:50.397][INFO][RK0][main]: Creating ParallelHashMap CPU database backend...
[HCTR][08:00:50.397][INFO][RK0][main]: Created parallel (16 partitions) blank database backend in local memory!
[HCTR][08:00:50.397][INFO][RK0][main]: Volatile DB: initial cache rate = 1
[HCTR][08:00:50.397][INFO][RK0][main]: Volatile DB: cache missed embeddings = 0
[HCTR][08:00:50.406][INFO][RK0][main]: Table: hps_et.hps_train.sparse_embedding1; cached 18502 / 18502 embeddings in volatile database (ParallelHashMap); load: 18403 / 18446744073709551615 (0.00%).
[HCTR][08:00:50.410][INFO][RK0][main]: Table: hps_et.hps_train.sparse_embedding2; cached 18471 / 18471 embeddings in volatile database (ParallelHashMap); load: 18432 / 