In [1]:
import hugectr
from hugectr.tools import DataGeneratorParams, DataGenerator

data_generator_params = DataGeneratorParams(
  format = hugectr.DataReaderType_t.Parquet,
  label_dim = 1,
  dense_dim = 10,
  num_slot = 4,
  i64_input_key = True,
  nnz_array = [1, 1, 1, 1],
  source = "./data_parquet/file_list.txt",
  eval_source = "./data_parquet/file_list_test.txt",
  slot_size_array = [10000, 10000, 10000, 10000],
  check_type = hugectr.Check_t.Non,
  dist_type = hugectr.Distribution_t.PowerLaw,
  power_law_type = hugectr.PowerLaw_t.Short,
  num_files = 16,
  eval_num_files = 4,
  num_samples_per_file = 40960)
data_generator = DataGenerator(data_generator_params)
data_generator.generate()

[HCTR][05:04:36.012][INFO][RK0][main]: Generate Parquet dataset
[HCTR][05:04:36.012][INFO][RK0][main]: train data folder: ./data_parquet, eval data folder: ./data_parquet, slot_size_array: 10000, 10000, 10000, 10000, nnz array: 1, 1, 1, 1, #files for train: 16, #files for eval: 4, #samples per file: 40960, Use power law distribution: 1, alpha of power law: 1.3
[HCTR][05:04:36.012][INFO][RK0][main]: ./data_parquet exist
[HCTR][05:04:36.012][INFO][RK0][main]: ./data_parquet exist
[HCTR][05:04:36.012][INFO][RK0][main]: ./data_parquet/train exist
[HCTR][05:04:36.012][INFO][RK0][main]: ./data_parquet/train/gen_0.parquet
[HCTR][05:04:37.520][INFO][RK0][main]: ./data_parquet/train/gen_1.parquet
[HCTR][05:04:37.638][INFO][RK0][main]: ./data_parquet/train/gen_2.parquet
[HCTR][05:04:37.769][INFO][RK0][main]: ./data_parquet/train/gen_3.parquet
[HCTR][05:04:37.931][INFO][RK0][main]: ./data_parquet/train/gen_4.parquet
[HCTR][05:04:38.059][INFO][RK0][main]: ./data_parquet/train/gen_5.parquet
[HCTR][

In [2]:
!pwd

/workspace/merlin/hugectr_inference_backend/hps_backend/examples


In [3]:
!mkdir hps_model

In [4]:
%%writefile fix_meta_json_path.py

import json
file_path_train = './data_parquet/train/_metadata.json'
file_path_val   = './data_parquet/val/_metadata.json'
def fix_meta_json_path(file_path):
    with open(file_path) as f:
        data = json.load(f)
        
    for item in data['file_stats']:
        item['file_name'] = "gen_{}".format(item['file_name'])
        print(item)
    
    with open(file_path, 'w') as f:
        json.dump(data, f)
fix_meta_json_path(file_path_train)
fix_meta_json_path(file_path_val)

Overwriting fix_meta_json_path.py


In [5]:
!python3 fix_meta_json_path.py

{'file_name': 'gen_0.parquet', 'num_rows': 40960}
{'file_name': 'gen_1.parquet', 'num_rows': 40960}
{'file_name': 'gen_2.parquet', 'num_rows': 40960}
{'file_name': 'gen_3.parquet', 'num_rows': 40960}
{'file_name': 'gen_4.parquet', 'num_rows': 40960}
{'file_name': 'gen_5.parquet', 'num_rows': 40960}
{'file_name': 'gen_6.parquet', 'num_rows': 40960}
{'file_name': 'gen_7.parquet', 'num_rows': 40960}
{'file_name': 'gen_8.parquet', 'num_rows': 40960}
{'file_name': 'gen_9.parquet', 'num_rows': 40960}
{'file_name': 'gen_10.parquet', 'num_rows': 40960}
{'file_name': 'gen_11.parquet', 'num_rows': 40960}
{'file_name': 'gen_12.parquet', 'num_rows': 40960}
{'file_name': 'gen_13.parquet', 'num_rows': 40960}
{'file_name': 'gen_14.parquet', 'num_rows': 40960}
{'file_name': 'gen_15.parquet', 'num_rows': 40960}
{'file_name': 'gen_0.parquet', 'num_rows': 40960}
{'file_name': 'gen_1.parquet', 'num_rows': 40960}
{'file_name': 'gen_2.parquet', 'num_rows': 40960}
{'file_name': 'gen_3.parquet', 'num_rows': 4

In [6]:
import pandas as pd

In [7]:
df = pd.read_parquet("./data_parquet/train/gen_0.parquet")
df.head()

Unnamed: 0,_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14
0,0.814841,0.520009,0.797189,0.222827,0.078717,0.614582,0.395775,0.567154,0.466539,0.775148,0.139989,9,1,2,52
1,0.619443,0.167564,0.716365,0.586061,0.908427,0.058091,0.241871,0.683993,0.159354,0.260729,0.820689,1,7,0,87
2,0.02729,0.554788,0.128592,0.35647,0.681377,0.321617,0.09858,0.840157,0.391385,0.724311,0.544364,2088,1,2,2
3,0.756215,0.546706,0.642247,0.640887,0.630262,0.664023,0.061477,0.012776,0.363288,0.551708,0.330753,0,9165,0,3
4,0.604962,0.065089,0.626631,0.364093,0.446031,0.244077,0.310744,0.514325,0.32086,0.329368,0.519373,1,39,39,2


In [8]:
import os
from time import time
import re
import shutil
import glob
import warnings

BASE_DIR = "/hps_demo"
embedding_folder  = os.path.join(BASE_DIR, "embedding")
wdl_embedding_repo= os.path.join(embedding_folder, "hps_infer")
wdl_version =os.path.join(wdl_embedding_repo, "1")

if os.path.isdir(embedding_folder):
    shutil.rmtree(embedding_folder)
os.makedirs(embedding_folder)

if os.path.isdir(wdl_embedding_repo):
    shutil.rmtree(wdl_embedding_repo)
os.makedirs(wdl_embedding_repo)

if os.path.isdir(wdl_version):
    shutil.rmtree(wdl_version)
os.makedirs(wdl_version)

In [9]:
!tree -l $BASE_DIR

[01;34m/hps_demo[00m
└── [01;34membedding[00m
    └── [01;34mhps_infer[00m
        └── [01;34m1[00m

3 directories, 0 files


In [10]:
%%writefile hps_model_train.py

import hugectr
from mpi4py import MPI

## typical DLRM architecture building
## Bottom layer: bottom MLP layer for dense features(10) + embedding layer for sparse features(2+2)
## Middle layer: concatenate 3 blocks
## Top layer: top MLP layer to fully connect all inputs (FC twice + RELU + BinaryCrossEntropy)

# construct model
solver = hugectr.CreateSolver(model_name = "hps_train",
                              max_eval_batches = 1,
                              batchsize_eval = 1024,
                              batchsize = 1024,
                              lr = 0.001,
                              vvgpu = [[0]],
                              i64_input_key = True,
                              repeat_dataset = True,
                              use_cuda_graph = True)
reader = hugectr.DataReaderParams(data_reader_type = hugectr.DataReaderType_t.Parquet,
                                  source = ["./data_parquet/file_list.txt"],
                                  eval_source = "./data_parquet/file_list_test.txt",
                                  check_type = hugectr.Check_t.Non,
                                  slot_size_array = [10000, 10000, 10000, 10000])
optimizer = hugectr.CreateOptimizer(optimizer_type = hugectr.Optimizer_t.Adam)
model = hugectr.Model(solver, reader, optimizer)

# model NN

# https://nvidia-merlin.github.io/HugeCTR/master/api/python_interface.html?highlight=model#input-layer
# check for "data_reader_sparse_param_array" parameter, 4 sparse feature in this case
# assigned 2 sparse feat for slot1, 2 sparse feat for slot2
model.add(hugectr.Input(label_dim = 1, label_name = "label",
                        dense_dim = 10, dense_name = "dense",
                        data_reader_sparse_param_array = 
                        [hugectr.DataReaderSparseParam("data1", [1, 1], True, 2),
                        hugectr.DataReaderSparseParam("data2", [1, 1], True, 2)]))

# sparse layer for categorical features
# https://nvidia-merlin.github.io/HugeCTR/master/api/python_interface.html?highlight=model#sparseembedding
# sparse layer should be defined after Input layer, but before Dense layer
# for embedding_type, check https://nvidia-merlin.github.io/HugeCTR/master/api/hugectr_layer_book.html#embedding-types-detail
model.add(hugectr.SparseEmbedding(embedding_type = hugectr.Embedding_t.DistributedSlotSparseEmbeddingHash, 
                            workspace_size_per_gpu_in_mb = 4,
                            embedding_vec_size = 16,
                            combiner = "sum",
                            sparse_embedding_name = "sparse_embedding1",
                            bottom_name = "data1",
                            optimizer = optimizer))
model.add(hugectr.SparseEmbedding(embedding_type = hugectr.Embedding_t.DistributedSlotSparseEmbeddingHash, 
                            workspace_size_per_gpu_in_mb = 8,
                            embedding_vec_size = 32,
                            combiner = "sum",
                            sparse_embedding_name = "sparse_embedding2",
                            bottom_name = "data2",
                            optimizer = optimizer))
# reshape
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Reshape,
                            bottom_names = ["sparse_embedding1"],
                            top_names = ["reshape1"],
                            leading_dim=32))                            
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Reshape,
                            bottom_names = ["sparse_embedding2"],
                            top_names = ["reshape2"],
                            leading_dim=64))

model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Concat,
                            bottom_names = ["reshape1", "reshape2", "dense"], top_names = ["concat1"]))

# FC layer + ReLU + FC + binary cross entropy
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.InnerProduct,
                            bottom_names = ["concat1"],
                            top_names = ["fc1"],
                            num_output=1024))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.ReLU,
                            bottom_names = ["fc1"],
                            top_names = ["relu1"]))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.InnerProduct,
                            bottom_names = ["relu1"],
                            top_names = ["fc2"],
                            num_output=1))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.BinaryCrossEntropyLoss,
                            bottom_names = ["fc2", "label"],
                            top_names = ["loss"]))

# model compile
model.compile()
model.summary()
model.graph_to_json("./hps_model/hps_train.json")
model.fit(max_iter = 1100, display = 200, eval_interval = 1000, snapshot = 1000, snapshot_prefix = "./hps_model/hps_train")
model.export_predictions("./hps_model/hps_train_pred_" + str(1000), "./hps_model/hps_train_label_" + str(1000))

Overwriting hps_model_train.py


In [11]:
!python3 hps_model_train.py

HugeCTR Version: 3.6
[HCTR][05:49:03.277][INFO][RK0][main]: Initialize model: hps_train
[HCTR][05:49:03.277][INFO][RK0][main]: Global seed is 4101819403
[HCTR][05:49:03.443][INFO][RK0][main]: Device to NUMA mapping:
  GPU 0 ->  node 3
[HCTR][05:49:04.896][INFO][RK0][main]: Start all2all warmup
[HCTR][05:49:04.898][INFO][RK0][main]: End all2all warmup
[HCTR][05:49:04.899][INFO][RK0][main]: Using All-reduce algorithm: NCCL
[HCTR][05:49:04.900][INFO][RK0][main]: Device 0: NVIDIA A100-SXM4-80GB
[HCTR][05:49:04.900][INFO][RK0][main]: num of DataReader workers: 1
[HCTR][05:49:04.901][INFO][RK0][main]: Vocabulary size: 40000
[HCTR][05:49:04.901][INFO][RK0][main]: max_vocabulary_size_per_gpu_=21845
[HCTR][05:49:04.902][DEBUG][RK0][tid #140382482462464]: file_name_ ./data_parquet/train/gen_0.parquet file_total_rows_ 40960
[HCTR][05:49:04.903][DEBUG][RK0][tid #140382461163264]: file_name_ ./data_parquet/val/gen_0.parquet file_total_rows_ 40960
[HCTR][05:49:04.905][INFO][RK0][main]: max_vocabular

In [12]:
!tree -l $BASE_DIR

[01;34m/hps_demo[00m
└── [01;34membedding[00m
    └── [01;34mhps_infer[00m
        └── [01;34m1[00m

3 directories, 0 files


In [13]:
!tree -l hps_model

[01;34mhps_model[00m
├── hps_train.json
├── hps_train0_opt_sparse_1000.model
├── [01;34mhps_train0_sparse_1000.model[00m
│   ├── emb_vector
│   └── key
├── hps_train1_opt_sparse_1000.model
├── [01;34mhps_train1_sparse_1000.model[00m
│   ├── emb_vector
│   └── key
├── hps_train_dense_1000.model
├── hps_train_label_1000
├── hps_train_opt_dense_1000.model
└── hps_train_pred_1000

2 directories, 11 files


In [14]:
!cp -r ./hps_model/hps_train0_sparse_1000.model /hps_demo/embedding/hps_infer/1
!cp -r ./hps_model/hps_train1_sparse_1000.model /hps_demo/embedding/hps_infer/1
!tree -l /hps_demo

[01;34m/hps_demo[00m
└── [01;34membedding[00m
    └── [01;34mhps_infer[00m
        └── [01;34m1[00m
            ├── [01;34mhps_train0_sparse_1000.model[00m
            │   ├── emb_vector
            │   └── key
            └── [01;34mhps_train1_sparse_1000.model[00m
                ├── emb_vector
                └── key

5 directories, 4 files


In [44]:
%%writefile hps_train2predict.py

# validation
from hugectr.inference import InferenceParams, CreateInferenceSession
import hugectr
import pandas as pd
import numpy as np
import sys
from mpi4py import MPI

def demo_inference(model_name, network_file, dense_file, embedding_file_list, data_file,enable_cache):
    # CATEGORICAL_COLUMNS=["C1_C2","C3_C4"]+["C" + str(x) for x in range(1, 5)]
    CATEGORICAL_COLUMNS=["C" + str(x) for x in range(1, 5)]
    CONTINUOUS_COLUMNS=["I" + str(x) for x in range(1, 11)]
    LABEL_COLUMNS = ['label']
    
    emb_size = [10000, 10000, 10000, 10000]
    shift = np.insert(np.cumsum(emb_size), 0, 0)[:-1]
    
    test_df = pd.read_csv(data_file,sep=',')
    config_file = network_file
    
    # row_ptrs = list(range(0,21))+list(range(0,261))
    row_ptrs = list([0,2,4])
    
    dense_features =  list(test_df[CONTINUOUS_COLUMNS].values.flatten())
    test_df[CATEGORICAL_COLUMNS].astype(np.int64)
    embedding_columns = list((test_df[CATEGORICAL_COLUMNS]+shift).values.flatten())

    # create parameter server, embedding cache and inference session
    inference_params = InferenceParams(model_name = model_name,
                                max_batchsize = 64,
                                hit_rate_threshold = 0.9,
                                dense_model_file = dense_file,
                                sparse_model_files = embedding_file_list,
                                device_id = 0,
                                use_gpu_embedding_cache = enable_cache,
                                cache_size_percentage = 0.9,
                                i64_input_key = True,
                                use_mixed_precision = False
                                )
    inference_session = CreateInferenceSession(config_file, inference_params)
    # TODO: check VSCR example for hugectr inference
    # https://gitlab-master.nvidia.com/dl/hugectr/hugectr_inference_backend/-/blob/main/docs/architecture.md#vcsr-example
    output = inference_session.predict(dense_features, embedding_columns, row_ptrs)
    print("HPS demo multi-embedding table inference result is {}".format(output))

def demo_lookup(model_name, network_file, dense_file, embedding_file_list, data_file,enable_cache):
    # CATEGORICAL_COLUMNS=["C1_C2","C3_C4"]+["C" + str(x) for x in range(1, 5)]
    CATEGORICAL_COLUMNS=["C" + str(x) for x in range(1, 5)]
    CONTINUOUS_COLUMNS=["I" + str(x) for x in range(1, 11)]
    LABEL_COLUMNS = ['label']
    
    emb_size = [10000, 10000, 10000, 10000]
    shift = np.insert(np.cumsum(emb_size), 0, 0)[:-1]
    test_df = pd.read_csv(data_file,sep=',')
    config_file = network_file
    
#     row_ptrs = list(range(0,21))+list(range(0,261))
    row_ptrs = list([0,2,4])
    
    dense_features =  list(test_df[CONTINUOUS_COLUMNS].values.flatten())
    test_df[CATEGORICAL_COLUMNS].astype(np.int64)
    embedding_columns = list((test_df[CATEGORICAL_COLUMNS]+shift).values.flatten())

    # create parameter server, embedding cache and inference session
    inference_params = InferenceParams(model_name = model_name,
                                max_batchsize = 64,
                                hit_rate_threshold = 0.9,
                                dense_model_file = dense_file,
                                sparse_model_files = embedding_file_list,
                                device_id = 0,
                                use_gpu_embedding_cache = enable_cache,
                                cache_size_percentage = 0.9,
                                i64_input_key = True,
                                use_mixed_precision = False
                                )
    inference_session = CreateInferenceSession(config_file, inference_params)
    # TODO: check VSCR example for hugectr inference
    # https://gitlab-master.nvidia.com/dl/hugectr/hugectr_inference_backend/-/blob/main/docs/architecture.md#vcsr-example
    output = inference_session.predict(dense_features, embedding_columns, row_ptrs)
    print("HPS demo multi-embedding table inference result is {}".format(output))
    
if __name__ == "__main__":
    model_name = sys.argv[1]
    network_file = sys.argv[2]
    dense_file = sys.argv[3]
    embedding_file_list = str(sys.argv[4]).split(',')
    print(embedding_file_list)
    data_file = sys.argv[5]
  

    #demo_inference(model_name, network_file, dense_file, embedding_file_list, data_file, True,hugectr.Database_t.Redis)
    demo_inference(model_name, network_file, dense_file, embedding_file_list, data_file, True)
    #demo_inference(model_name, network_file, dense_file, embedding_file_list, data_file, False)


Overwriting hps_train2predict.py


In [31]:
# prepare infer_test file

In [32]:
df = pd.read_parquet("./data_parquet/val/gen_0.parquet")
df.head()

Unnamed: 0,_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14
0,0.85722,0.776533,0.619641,0.513418,0.289615,0.244357,0.957589,0.764655,0.080346,0.247009,0.753515,0,5,0,7
1,0.120284,0.602983,0.954893,0.740286,0.731098,0.448716,0.557691,0.257979,0.871001,0.031287,0.210629,20,2,18,2
2,0.91218,0.090513,0.36364,0.750713,0.547341,0.031661,0.994557,0.702622,0.395151,0.520261,0.747921,6,5,8,22
3,0.189186,0.312756,0.235713,0.248728,0.056102,0.872351,0.658739,0.233019,0.18673,0.749987,0.343642,199,28,2,0
4,0.892929,0.740022,0.132553,0.956464,0.322804,0.746096,0.120569,0.745465,0.08563,0.608585,0.762991,9,5,7,1


In [33]:
CATEGORICAL_COLUMNS=["C" + str(x) for x in range(1, 5)]
CONTINUOUS_COLUMNS=["I" + str(x) for x in range(1, 11)]
LABEL_COLUMNS = ['label']
cols = LABEL_COLUMNS + CONTINUOUS_COLUMNS + CATEGORICAL_COLUMNS
cols

['label',
 'I1',
 'I2',
 'I3',
 'I4',
 'I5',
 'I6',
 'I7',
 'I8',
 'I9',
 'I10',
 'C1',
 'C2',
 'C3',
 'C4']

In [34]:
df.set_axis(cols, axis=1,inplace=True)
df.head()

Unnamed: 0,label,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,C1,C2,C3,C4
0,0.85722,0.776533,0.619641,0.513418,0.289615,0.244357,0.957589,0.764655,0.080346,0.247009,0.753515,0,5,0,7
1,0.120284,0.602983,0.954893,0.740286,0.731098,0.448716,0.557691,0.257979,0.871001,0.031287,0.210629,20,2,18,2
2,0.91218,0.090513,0.36364,0.750713,0.547341,0.031661,0.994557,0.702622,0.395151,0.520261,0.747921,6,5,8,22
3,0.189186,0.312756,0.235713,0.248728,0.056102,0.872351,0.658739,0.233019,0.18673,0.749987,0.343642,199,28,2,0
4,0.892929,0.740022,0.132553,0.956464,0.322804,0.746096,0.120569,0.745465,0.08563,0.608585,0.762991,9,5,7,1


In [35]:
df.to_csv('./hps_model/infer_test.csv', sep=',', index=False,header=True)

In [45]:
!python hps_train2predict.py \
    "hps_train" \
    "./hps_model/hps_train.json" \
    "./hps_model/hps_train_dense_1000.model" \
    "./hps_model/hps_train0_sparse_1000.model,./hps_model/hps_train1_sparse_1000.model" \
    "./hps_model/infer_test.csv"

['./hps_model/hps_train0_sparse_1000.model', './hps_model/hps_train1_sparse_1000.model']
[HCTR][08:22:51.768][INFO][RK0][main]: default_emb_vec_value is not specified using default: 0
[HCTR][08:22:51.768][INFO][RK0][main]: default_emb_vec_value is not specified using default: 0
[HCTR][08:22:51.768][INFO][RK0][main]: Creating ParallelHashMap CPU database backend...
[HCTR][08:22:51.769][INFO][RK0][main]: Created parallel (16 partitions) blank database backend in local memory!
[HCTR][08:22:51.769][INFO][RK0][main]: Volatile DB: initial cache rate = 1
[HCTR][08:22:51.769][INFO][RK0][main]: Volatile DB: cache missed embeddings = 0
[HCTR][08:22:51.782][INFO][RK0][main]: Table: hps_et.hps_train.sparse_embedding1; cached 18502 / 18502 embeddings in volatile database (ParallelHashMap); load: 18502 / 18446744073709551615 (0.00%).
[HCTR][08:22:51.788][INFO][RK0][main]: Table: hps_et.hps_train.sparse_embedding2; cached 18471 / 18471 embeddings in volatile database (ParallelHashMap); load: 18471 / 