Skip to content

Commit

Permalink
Enable Triton for SageMaker MME mode (#4181)
Browse files Browse the repository at this point in the history
* SM updates and use personal repo

* Entrypoint changes and input request parsing

* Modify serve script, separate function and add return code

* Add MME APIs

* Add GET API

* SM updates and use personal repo

* Entrypoint changes and input request parsing

* Modify serve script, separate function and add return code

* Add MME APIs

* Add GET API

* Add validation tests and fixes

* Revert build changes and remove refernces to personal repo

* Cleanup

* Add mutex for list update and address comments
  • Loading branch information
nskool committed May 9, 2022
1 parent cdfaf4f commit cb0c444
Show file tree
Hide file tree
Showing 6 changed files with 769 additions and 9 deletions.
1 change: 1 addition & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,7 @@ def create_dockerfile_linux(ddir, dockerfile_name, argmap, backends, repoagents,
if 'sagemaker' in endpoints:
df += '''
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
COPY --chown=1000:1000 docker/sagemaker/serve /usr/bin/.
'''

Expand Down
31 changes: 25 additions & 6 deletions docker/sagemaker/serve
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,20 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

SAGEMAKER_SINGLE_MODEL_REPO=/opt/ml/model/
SAGEMAKER_ARGS="--model-repository=${SAGEMAKER_SINGLE_MODEL_REPO}"
SAGEMAKER_MULTI_MODEL_REPO=/opt/ml/models/

SAGEMAKER_MODEL_REPO=${SAGEMAKER_SINGLE_MODEL_REPO}
is_mme_mode=false

if [ -n "$SAGEMAKER_MULTI_MODEL" ]; then
if [ "$SAGEMAKER_MULTI_MODEL" == "true" ]; then
SAGEMAKER_MODEL_REPO=${SAGEMAKER_MULTI_MODEL_REPO}
is_mme_mode=true
echo "Triton is running in SageMaker MME mode"
fi
fi

SAGEMAKER_ARGS="--model-repository=${SAGEMAKER_MODEL_REPO}"
if [ -n "$SAGEMAKER_BIND_TO_PORT" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-port=${SAGEMAKER_BIND_TO_PORT}"
fi
Expand All @@ -51,22 +64,28 @@ fi
if [ -n "$SAGEMAKER_TRITON_LOG_ERROR" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-error=${SAGEMAKER_TRITON_LOG_ERROR}"
fi
if [ -n "$SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=${SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE}"
fi
if [ -n "$SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=${SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE}"
fi

if [ -f "${SAGEMAKER_SINGLE_MODEL_REPO}/config.pbtxt" ]; then
if [ "${is_mme_mode}" = false ] && [ -f "${SAGEMAKER_MODEL_REPO}/config.pbtxt" ]; then
echo "ERROR: Incorrect directory structure."
echo " Model directory needs to contain the top level folder"
exit 1
fi

if [ -n "$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
if [ -d "${SAGEMAKER_SINGLE_MODEL_REPO}/$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
if [ "${is_mme_mode}" = false ] && [ -n "$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
if [ -d "${SAGEMAKER_MODEL_REPO}/$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME}"
else
echo "ERROR: Directory with provided SAGEMAKER_TRITON_DEFAULT_MODEL_NAME ${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME} does not exist"
exit 1
fi
else
MODEL_DIRS=(`find "${SAGEMAKER_SINGLE_MODEL_REPO}" -mindepth 1 -maxdepth 1 -type d -printf "%f\n"`)
elif [ "${is_mme_mode}" = false ]; then
MODEL_DIRS=(`find "${SAGEMAKER_MODEL_REPO}" -mindepth 1 -maxdepth 1 -type d -printf "%f\n"`)
case ${#MODEL_DIRS[@]} in
0) echo "ERROR: No model found in model repository";
exit 1
Expand Down
222 changes: 222 additions & 0 deletions qa/L0_sagemaker/sagemaker_multi_model_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
#!/usr/bin/python
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import shutil
import time
import unittest
import numpy as np
import infer_util as iu
import test_util as tu
import tritonclient.http as httpclient

import argparse
import csv
import json
import os
import requests
import socket
import sys


class SageMakerMultiModelTest(tu.TestResultCollector):
def setUp(self):

SAGEMAKER_BIND_TO_PORT = os.getenv("SAGEMAKER_BIND_TO_PORT", "8080")
self.url_mme_ = "http://localhost:{}/models".format(SAGEMAKER_BIND_TO_PORT)

# model_1 setup
self.model1_name = "sm_mme_model_1"
self.model1_url = "/opt/ml/models/123456789abcdefghi/model"

self.model1_input_data_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
self.model1_expected_output0_data_ = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
self.model1_expected_output1_data_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

self.model1_expected_result_ = {
"model_name": "sm_mme_model_1",
"model_version": "1",
"outputs": [
{"name": "OUTPUT0", "datatype": "INT32", "shape": [1, 16], "data": self.model1_expected_output0_data_},
{"name": "OUTPUT1", "datatype": "INT32", "shape": [1, 16], "data": self.model1_expected_output1_data_},
],
}

# model_2 setup
self.model2_name = "sm_mme_model_2"
self.model2_url = "/opt/ml/models/987654321ihgfedcba/model"

# Output is same as input since this is an identity model
self.model2_input_data_ = [0, 1, 2, 3, 4, 5, 6, 7]

def test_sm_0_environment_variables_set(self):
self.assertEqual(
os.getenv("SAGEMAKER_MULTI_MODEL"), "true", "Variable SAGEMAKER_MULTI_MODEL must be set to true"
)

def test_sm_1_model_load(self):
# Load model_1
request_body = {"model_name": self.model1_name, "url": self.model1_url}
headers = {"Content-Type": "application/json"}
r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
time.sleep(5) # wait for model to load
self.assertEqual(r.status_code, 200, "Expected status code 200, received {}".format(r.status_code))

# Load the same model again, expect a 409
request_body = {"model_name": self.model1_name, "url": self.model1_url}
headers = {"Content-Type": "application/json"}
r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
time.sleep(5) # wait for model to load
self.assertEqual(r.status_code, 409, "Expected status code 409, received {}".format(r.status_code))

# Load model_2
request_body = {"model_name": self.model2_name, "url": self.model2_url}
headers = {"Content-Type": "application/json"}
r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
time.sleep(5) # wait for model to load
self.assertEqual(r.status_code, 200, "Expected status code 200, received {}".format(r.status_code))

def test_sm_2_model_list(self):
r = requests.get(self.url_mme_)
time.sleep(3)
expected_response_1 = {
"models": [
{"modelName": self.model1_name, "modelUrl": self.model1_url},
{"modelName": self.model2_name, "modelUrl": self.model2_url},
]
}
expected_response_2 = {
"models": [
{"modelName": self.model2_name, "modelUrl": self.model2_url},
{"modelName": self.model1_name, "modelUrl": self.model1_url},
]
}

# Returned list response's order is not deterministic
self.assertIn(
r.json(),
[expected_response_1, expected_response_2],
"Expected one of {}, received: {}".format([expected_response_1, expected_response_2], r.json()),
)

def test_sm_3_model_get(self):
get_url = "{}/{}".format(self.url_mme_, self.model1_name)
r = requests.get(get_url)
time.sleep(3)
expected_response = {"modelName": self.model1_name, "modelUrl": self.model1_url}
self.assertEqual(
r.json(), expected_response, "Expected response: {}, received: {}".format(expected_response, r.json())
)

def test_sm_4_model_invoke(self):
# Invoke model_1
inputs = []
outputs = []
inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

# Initialize the data
input_data = np.array(self.model1_input_data_, dtype=np.int32)
input_data = np.expand_dims(input_data, axis=0)
inputs[0].set_data_from_numpy(input_data, binary_data=False)
inputs[1].set_data_from_numpy(input_data, binary_data=False)

outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
request_body, _ = httpclient.InferenceServerClient.generate_request_body(inputs, outputs=outputs)

headers = {"Content-Type": "application/json"}
invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model1_name)
r = requests.post(invoke_url, data=request_body, headers=headers)
r.raise_for_status()

self.assertEqual(
self.model1_expected_result_,
r.json(),
"Expected response : {}, received: {}".format(self.model1_expected_result_, r.json()),
)

# Invoke model_2
inputs = []
outputs = []
inputs.append(
httpclient.InferInput(
"INPUT0",
[1, 8],
"FP32",
)
)
input_data = np.array(self.model2_input_data_, dtype=np.float32)
input_data = np.expand_dims(input_data, axis=0)
inputs[0].set_data_from_numpy(input_data, binary_data=True)

outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))

request_body, header_length = httpclient.InferenceServerClient.generate_request_body(inputs, outputs=outputs)

invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model2_name)
headers = {
"Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size={}".format(header_length)
}
r = requests.post(invoke_url, data=request_body, headers=headers)

header_length_prefix = "application/vnd.sagemaker-triton.binary+json;json-header-size="
header_length_str = r.headers["Content-Type"][len(header_length_prefix) :]
result = httpclient.InferenceServerClient.parse_response_body(r._content, header_length=int(header_length_str))

# Get the inference header size so we can locate the output binary data
output_data = result.as_numpy("OUTPUT0")

for i in range(8):
self.assertEqual(output_data[0][i], input_data[0][i], "Tensor Value Mismatch")

def test_sm_5_model_unload(self):
# Unload model_1
unload_url = "{}/{}".format(self.url_mme_, self.model1_name)
r = requests.delete(unload_url)
time.sleep(3)
self.assertEqual(r.status_code, 200, "Expected status code 200, received {}".format(r.status_code))

# Unload model_2
unload_url = "{}/{}".format(self.url_mme_, self.model2_name)
r = requests.delete(unload_url)
time.sleep(3)
self.assertEqual(r.status_code, 200, "Expected status code 200, received {}".format(r.status_code))

# Unload a non-loaded model, expect a 404
unload_url = "{}/sm_non_loaded_model".format(self.url_mme_)
r = requests.delete(unload_url)
time.sleep(3)
self.assertEqual(r.status_code, 404, "Expected status code 404, received {}".format(r.status_code))


if __name__ == "__main__":
unittest.main()
66 changes: 65 additions & 1 deletion qa/L0_sagemaker/test.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -55,6 +55,8 @@ rm -f *.log
rm -f *.out

SAGEMAKER_TEST=sagemaker_test.py
SAGEMAKER_MULTI_MODEL_TEST=sagemaker_multi_model_test.py
MULTI_MODEL_UNIT_TEST_COUNT=6
UNIT_TEST_COUNT=9
CLIENT_LOG="./client.log"

Expand Down Expand Up @@ -363,6 +365,68 @@ fi
kill $SERVER_PID
wait $SERVER_PID

# MME begin
# Prepare model repository

ln -s `pwd`/models /opt/ml/models
# Model path will be of the form /opt/ml/models/<hash>/model
MODEL1_PATH="models/123456789abcdefghi/model"
MODEL2_PATH="models/987654321ihgfedcba/model"
mkdir -p "${MODEL1_PATH}"
mkdir -p "${MODEL2_PATH}"

cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32/* ${MODEL1_PATH} && \
rm -r ${MODEL1_PATH}/2 && rm -r ${MODEL1_PATH}/3 && \
sed -i "s/onnx_int32_int32_int32/sm_mme_model_1/" ${MODEL1_PATH}/config.pbtxt

cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32/* ${MODEL2_PATH} && \
sed -i "s/onnx_zero_1_float32/sm_mme_model_2/" ${MODEL2_PATH}/config.pbtxt

# Start server with 'serve' script
export SAGEMAKER_MULTI_MODEL=true
export SAGEMAKER_TRITON_LOG_VERBOSE=true

serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
kill $SERVER_PID || true
cat $SERVER_LOG
exit 1
fi

# API tests in default setting
set +e
python $SAGEMAKER_MULTI_MODEL_TEST SageMakerMultiModelTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Test Failed\n***"
cat $CLIENT_LOG
RET=1
else
check_test_results $TEST_RESULT_FILE $MULTI_MODEL_UNIT_TEST_COUNT
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
fi
fi
set -e

unset SAGEMAKER_MULTI_MODEL

unlink /opt/ml/models
rm -rf /opt/ml/models

kill $SERVER_PID
wait $SERVE_PID

# MME end


unlink /opt/ml/model
rm -rf /opt/ml/model

Expand Down
Loading

0 comments on commit cb0c444

Please sign in to comment.