Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable Triton for SageMaker MME mode #4181

Merged
merged 18 commits into from
May 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -1011,6 +1011,7 @@ def create_dockerfile_linux(ddir, dockerfile_name, argmap, backends, repoagents,
if 'sagemaker' in endpoints:
df += '''
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
COPY --chown=1000:1000 docker/sagemaker/serve /usr/bin/.
'''

Expand Down
31 changes: 25 additions & 6 deletions docker/sagemaker/serve
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,20 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

SAGEMAKER_SINGLE_MODEL_REPO=/opt/ml/model/
SAGEMAKER_ARGS="--model-repository=${SAGEMAKER_SINGLE_MODEL_REPO}"
SAGEMAKER_MULTI_MODEL_REPO=/opt/ml/models/

SAGEMAKER_MODEL_REPO=${SAGEMAKER_SINGLE_MODEL_REPO}
is_mme_mode=false

if [ -n "$SAGEMAKER_MULTI_MODEL" ]; then
if [ "$SAGEMAKER_MULTI_MODEL" == "true" ]; then
SAGEMAKER_MODEL_REPO=${SAGEMAKER_MULTI_MODEL_REPO}
is_mme_mode=true
echo "Triton is running in SageMaker MME mode"
fi
fi

SAGEMAKER_ARGS="--model-repository=${SAGEMAKER_MODEL_REPO}"
if [ -n "$SAGEMAKER_BIND_TO_PORT" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-port=${SAGEMAKER_BIND_TO_PORT}"
fi
Expand All @@ -51,22 +64,28 @@ fi
if [ -n "$SAGEMAKER_TRITON_LOG_ERROR" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-error=${SAGEMAKER_TRITON_LOG_ERROR}"
fi
if [ -n "$SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=${SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE}"
fi
if [ -n "$SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=${SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE}"
fi

if [ -f "${SAGEMAKER_SINGLE_MODEL_REPO}/config.pbtxt" ]; then
if [ "${is_mme_mode}" = false ] && [ -f "${SAGEMAKER_MODEL_REPO}/config.pbtxt" ]; then
echo "ERROR: Incorrect directory structure."
echo " Model directory needs to contain the top level folder"
exit 1
fi

if [ -n "$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
if [ -d "${SAGEMAKER_SINGLE_MODEL_REPO}/$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
if [ "${is_mme_mode}" = false ] && [ -n "$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
if [ -d "${SAGEMAKER_MODEL_REPO}/$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME}"
else
echo "ERROR: Directory with provided SAGEMAKER_TRITON_DEFAULT_MODEL_NAME ${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME} does not exist"
exit 1
fi
else
MODEL_DIRS=(`find "${SAGEMAKER_SINGLE_MODEL_REPO}" -mindepth 1 -maxdepth 1 -type d -printf "%f\n"`)
elif [ "${is_mme_mode}" = false ]; then
MODEL_DIRS=(`find "${SAGEMAKER_MODEL_REPO}" -mindepth 1 -maxdepth 1 -type d -printf "%f\n"`)
case ${#MODEL_DIRS[@]} in
0) echo "ERROR: No model found in model repository";
exit 1
Expand Down
222 changes: 222 additions & 0 deletions qa/L0_sagemaker/sagemaker_multi_model_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
#!/usr/bin/python
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../common")

import os
import shutil
import time
import unittest
import numpy as np
import infer_util as iu
import test_util as tu
import tritonclient.http as httpclient

import argparse
import csv
import json
import os
import requests
import socket
import sys


class SageMakerMultiModelTest(tu.TestResultCollector):
def setUp(self):

SAGEMAKER_BIND_TO_PORT = os.getenv("SAGEMAKER_BIND_TO_PORT", "8080")
self.url_mme_ = "http://localhost:{}/models".format(SAGEMAKER_BIND_TO_PORT)

# model_1 setup
self.model1_name = "sm_mme_model_1"
self.model1_url = "/opt/ml/models/123456789abcdefghi/model"

self.model1_input_data_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
self.model1_expected_output0_data_ = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
self.model1_expected_output1_data_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

self.model1_expected_result_ = {
"model_name": "sm_mme_model_1",
"model_version": "1",
"outputs": [
{"name": "OUTPUT0", "datatype": "INT32", "shape": [1, 16], "data": self.model1_expected_output0_data_},
{"name": "OUTPUT1", "datatype": "INT32", "shape": [1, 16], "data": self.model1_expected_output1_data_},
],
}

# model_2 setup
self.model2_name = "sm_mme_model_2"
self.model2_url = "/opt/ml/models/987654321ihgfedcba/model"

# Output is same as input since this is an identity model
self.model2_input_data_ = [0, 1, 2, 3, 4, 5, 6, 7]

def test_sm_0_environment_variables_set(self):
self.assertEqual(
os.getenv("SAGEMAKER_MULTI_MODEL"), "true", "Variable SAGEMAKER_MULTI_MODEL must be set to true"
)

def test_sm_1_model_load(self):
# Load model_1
request_body = {"model_name": self.model1_name, "url": self.model1_url}
headers = {"Content-Type": "application/json"}
r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
time.sleep(5) # wait for model to load
self.assertEqual(r.status_code, 200, "Expected status code 200, received {}".format(r.status_code))

# Load the same model again, expect a 409
request_body = {"model_name": self.model1_name, "url": self.model1_url}
headers = {"Content-Type": "application/json"}
r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
time.sleep(5) # wait for model to load
self.assertEqual(r.status_code, 409, "Expected status code 409, received {}".format(r.status_code))

# Load model_2
request_body = {"model_name": self.model2_name, "url": self.model2_url}
headers = {"Content-Type": "application/json"}
r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
time.sleep(5) # wait for model to load
self.assertEqual(r.status_code, 200, "Expected status code 200, received {}".format(r.status_code))

def test_sm_2_model_list(self):
r = requests.get(self.url_mme_)
time.sleep(3)
expected_response_1 = {
"models": [
{"modelName": self.model1_name, "modelUrl": self.model1_url},
{"modelName": self.model2_name, "modelUrl": self.model2_url},
]
}
expected_response_2 = {
"models": [
{"modelName": self.model2_name, "modelUrl": self.model2_url},
{"modelName": self.model1_name, "modelUrl": self.model1_url},
]
}

# Returned list response's order is not deterministic
self.assertIn(
r.json(),
[expected_response_1, expected_response_2],
"Expected one of {}, received: {}".format([expected_response_1, expected_response_2], r.json()),
)

def test_sm_3_model_get(self):
get_url = "{}/{}".format(self.url_mme_, self.model1_name)
r = requests.get(get_url)
time.sleep(3)
expected_response = {"modelName": self.model1_name, "modelUrl": self.model1_url}
self.assertEqual(
r.json(), expected_response, "Expected response: {}, received: {}".format(expected_response, r.json())
)

def test_sm_4_model_invoke(self):
# Invoke model_1
inputs = []
outputs = []
inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))

# Initialize the data
input_data = np.array(self.model1_input_data_, dtype=np.int32)
input_data = np.expand_dims(input_data, axis=0)
inputs[0].set_data_from_numpy(input_data, binary_data=False)
inputs[1].set_data_from_numpy(input_data, binary_data=False)

outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
request_body, _ = httpclient.InferenceServerClient.generate_request_body(inputs, outputs=outputs)

headers = {"Content-Type": "application/json"}
invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model1_name)
r = requests.post(invoke_url, data=request_body, headers=headers)
r.raise_for_status()

self.assertEqual(
self.model1_expected_result_,
r.json(),
"Expected response : {}, received: {}".format(self.model1_expected_result_, r.json()),
)

# Invoke model_2
inputs = []
outputs = []
inputs.append(
httpclient.InferInput(
"INPUT0",
[1, 8],
"FP32",
)
)
input_data = np.array(self.model2_input_data_, dtype=np.float32)
input_data = np.expand_dims(input_data, axis=0)
inputs[0].set_data_from_numpy(input_data, binary_data=True)

outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))

request_body, header_length = httpclient.InferenceServerClient.generate_request_body(inputs, outputs=outputs)

invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model2_name)
headers = {
"Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size={}".format(header_length)
}
r = requests.post(invoke_url, data=request_body, headers=headers)

header_length_prefix = "application/vnd.sagemaker-triton.binary+json;json-header-size="
header_length_str = r.headers["Content-Type"][len(header_length_prefix) :]
result = httpclient.InferenceServerClient.parse_response_body(r._content, header_length=int(header_length_str))

# Get the inference header size so we can locate the output binary data
output_data = result.as_numpy("OUTPUT0")

for i in range(8):
self.assertEqual(output_data[0][i], input_data[0][i], "Tensor Value Mismatch")

def test_sm_5_model_unload(self):
# Unload model_1
unload_url = "{}/{}".format(self.url_mme_, self.model1_name)
r = requests.delete(unload_url)
time.sleep(3)
self.assertEqual(r.status_code, 200, "Expected status code 200, received {}".format(r.status_code))

# Unload model_2
unload_url = "{}/{}".format(self.url_mme_, self.model2_name)
r = requests.delete(unload_url)
time.sleep(3)
self.assertEqual(r.status_code, 200, "Expected status code 200, received {}".format(r.status_code))

# Unload a non-loaded model, expect a 404
unload_url = "{}/sm_non_loaded_model".format(self.url_mme_)
r = requests.delete(unload_url)
time.sleep(3)
self.assertEqual(r.status_code, 404, "Expected status code 404, received {}".format(r.status_code))


if __name__ == "__main__":
unittest.main()
66 changes: 65 additions & 1 deletion qa/L0_sagemaker/test.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -55,6 +55,8 @@ rm -f *.log
rm -f *.out

SAGEMAKER_TEST=sagemaker_test.py
SAGEMAKER_MULTI_MODEL_TEST=sagemaker_multi_model_test.py
MULTI_MODEL_UNIT_TEST_COUNT=6
UNIT_TEST_COUNT=9
CLIENT_LOG="./client.log"

Expand Down Expand Up @@ -363,6 +365,68 @@ fi
kill $SERVER_PID
wait $SERVER_PID

# MME begin
# Prepare model repository

ln -s `pwd`/models /opt/ml/models
# Model path will be of the form /opt/ml/models/<hash>/model
MODEL1_PATH="models/123456789abcdefghi/model"
MODEL2_PATH="models/987654321ihgfedcba/model"
mkdir -p "${MODEL1_PATH}"
mkdir -p "${MODEL2_PATH}"

cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32/* ${MODEL1_PATH} && \
rm -r ${MODEL1_PATH}/2 && rm -r ${MODEL1_PATH}/3 && \
sed -i "s/onnx_int32_int32_int32/sm_mme_model_1/" ${MODEL1_PATH}/config.pbtxt

cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32/* ${MODEL2_PATH} && \
sed -i "s/onnx_zero_1_float32/sm_mme_model_2/" ${MODEL2_PATH}/config.pbtxt

# Start server with 'serve' script
export SAGEMAKER_MULTI_MODEL=true
export SAGEMAKER_TRITON_LOG_VERBOSE=true

serve > $SERVER_LOG 2>&1 &
SERVE_PID=$!
# Obtain Triton PID in such way as $! will return the script PID
sleep 1
SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
sagemaker_wait_for_server_ready $SERVER_PID 10
if [ "$WAIT_RET" != "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
kill $SERVER_PID || true
cat $SERVER_LOG
exit 1
fi

# API tests in default setting
set +e
python $SAGEMAKER_MULTI_MODEL_TEST SageMakerMultiModelTest >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Test Failed\n***"
cat $CLIENT_LOG
RET=1
else
check_test_results $TEST_RESULT_FILE $MULTI_MODEL_UNIT_TEST_COUNT
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
fi
fi
set -e

unset SAGEMAKER_MULTI_MODEL

unlink /opt/ml/models
rm -rf /opt/ml/models

kill $SERVER_PID
wait $SERVE_PID

# MME end


unlink /opt/ml/model
rm -rf /opt/ml/model

Expand Down