In [1]:
import boto3
import sagemaker

In [2]:
print(boto3.__version__)
print(sagemaker.__version__)

1.17.38
2.32.0


In [3]:
sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
print(bucket)
print(role)

sagemaker-us-east-2-805291263703
arn:aws:iam::805291263703:role/service-role/AmazonSageMaker-ExecutionRole-20200826T145400


In [4]:
sm = boto3.client('sagemaker')

In [5]:
prefix = 'pytorch-dogscats'

# Create multi-model endpoint

In [6]:
import time

In [67]:
model_name_1 = prefix + '-1-' + time.strftime('%Y-%m-%d-%H-%M-%S', time.gmtime())
model_name_2 = prefix + '-2-' + time.strftime('%Y-%m-%d-%H-%M-%S', time.gmtime())
print(model_name_1)
print(model_name_2)

pytorch-dogscats-1-2021-03-30-06-49-23
pytorch-dogscats-2-2021-03-30-06-49-23


In [68]:
response = sm.create_model(
    ModelName = model_name_1,
    Containers = [
        {
            'Image': '763104351884.dkr.ecr.us-east-2.amazonaws.com/pytorch-inference:1.5-cpu-py3',
            'Mode': 'SingleModel',
            'ModelDataUrl': 's3://{}/{}/dogscats.tar.gz'.format(bucket, prefix),
            'Environment': {
                'SAGEMAKER_PROGRAM': 'inference.py',
                'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',
                'SAGEMAKER_CONTAINER_LOG_LEVEL': '20',
                'SAGEMAKER_REGION': 'us-east-2',
                'MMS_DEFAULT_RESPONSE_TIMEOUT': '500'
            }
        }
    ],
    ExecutionRoleArn = role
)
print(response)

{'ModelArn': 'arn:aws:sagemaker:us-east-2:805291263703:model/pytorch-dogscats-1-2021-03-30-06-49-23', 'ResponseMetadata': {'RequestId': '0386c177-d104-47d4-86fb-fe342cb0e3cf', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '0386c177-d104-47d4-86fb-fe342cb0e3cf', 'content-type': 'application/x-amz-json-1.1', 'content-length': '100', 'date': 'Tue, 30 Mar 2021 06:49:46 GMT'}, 'RetryAttempts': 0}}


In [58]:
response = sm.create_model(
    ModelName = model_name_2,
    Containers = [
        {
            'Image': '763104351884.dkr.ecr.us-east-2.amazonaws.com/pytorch-inference:1.8.0-cpu-py3',
            'Mode': 'SingleModel',
            'ModelDataUrl': 's3://{}/{}/model.tar.gz'.format(bucket, prefix),
            'Environment': {
                'SAGEMAKER_PROGRAM': 'inference.py',
                'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',
                'SAGEMAKER_CONTAINER_LOG_LEVEL': '20',
                'SAGEMAKER_REGION': 'us-east-2',
                'MMS_DEFAULT_RESPONSE_TIMEOUT': '500'
            }
        }
    ],
    ExecutionRoleArn = role
)
print(response)

{'ModelArn': 'arn:aws:sagemaker:us-east-2:805291263703:model/pytorch-dogscats-2-2021-03-30-05-06-44', 'ResponseMetadata': {'RequestId': 'dcfc9e5d-3199-4aa7-8df2-a374eacc1002', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'dcfc9e5d-3199-4aa7-8df2-a374eacc1002', 'content-type': 'application/x-amz-json-1.1', 'content-length': '100', 'date': 'Tue, 30 Mar 2021 05:06:48 GMT'}, 'RetryAttempts': 0}}


In [69]:
epc_name = prefix + '-epc-' + time.strftime('%Y-%m-%d-%H-%M-%S', time.gmtime())
print('Endpoint configuration name: ', epc_name)

response = sm.create_endpoint_config(
    EndpointConfigName = epc_name,
    ProductionVariants = [
        {
            'VariantName': 'version-1',
            'ModelName': model_name_1,
            'InitialInstanceCount': 1,
            'InstanceType': 'ml.m5.large',
            'InitialVariantWeight': 7
        },
        {
            'VariantName': 'version-2',
#             'ModelName': model_name_2,
            'ModelName': model_name_1,
            'InitialInstanceCount': 1,
#             'InstanceType': 'ml.m5.large',
            'InstanceType': 'ml.t2.medium',
            'InitialVariantWeight': 3
        }
    ]
)
print(response)

Endpoint configuration name:  pytorch-dogscats-epc-2021-03-30-06-50-09
{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-2:805291263703:endpoint-config/pytorch-dogscats-epc-2021-03-30-06-50-09', 'ResponseMetadata': {'RequestId': 'a8a9cb96-b79d-4a91-bf93-b4c8d35e86ee', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'a8a9cb96-b79d-4a91-bf93-b4c8d35e86ee', 'content-type': 'application/x-amz-json-1.1', 'content-length': '121', 'date': 'Tue, 30 Mar 2021 06:50:08 GMT'}, 'RetryAttempts': 0}}


In [70]:
ep_name = prefix + '-ep-' + time.strftime('%Y-%m-%d-%H-%M-%S', time.gmtime())
print(ep_name)

response = sm.create_endpoint(
    EndpointName = ep_name,
    EndpointConfigName = epc_name
)
print(response)

pytorch-dogscats-ep-2021-03-30-06-50-52
{'EndpointArn': 'arn:aws:sagemaker:us-east-2:805291263703:endpoint/pytorch-dogscats-ep-2021-03-30-06-50-52', 'ResponseMetadata': {'RequestId': '66ae162f-845a-497c-b5de-58501cb48909', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '66ae162f-845a-497c-b5de-58501cb48909', 'content-type': 'application/x-amz-json-1.1', 'content-length': '107', 'date': 'Tue, 30 Mar 2021 06:50:51 GMT'}, 'RetryAttempts': 0}}


In [71]:
sm.describe_endpoint(EndpointName=ep_name)

{'EndpointName': 'pytorch-dogscats-ep-2021-03-30-06-50-52',
 'EndpointArn': 'arn:aws:sagemaker:us-east-2:805291263703:endpoint/pytorch-dogscats-ep-2021-03-30-06-50-52',
 'EndpointConfigName': 'pytorch-dogscats-epc-2021-03-30-06-50-09',
 'EndpointStatus': 'Creating',
 'CreationTime': datetime.datetime(2021, 3, 30, 6, 50, 52, 302000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2021, 3, 30, 6, 50, 52, 302000, tzinfo=tzlocal()),
 'ResponseMetadata': {'RequestId': '1ec3947f-b79f-4de3-9518-81265b5cfcee',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '1ec3947f-b79f-4de3-9518-81265b5cfcee',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '324',
   'date': 'Tue, 30 Mar 2021 06:50:53 GMT'},
  'RetryAttempts': 0}}

In [72]:
waiter = sm.get_waiter('endpoint_in_service')
waiter.wait(EndpointName=ep_name)

# Update endpoint weight and capacity

In [27]:
response = sm.update_endpoint_weights_and_capacities(
    EndpointName = ep_name,
    DesiredWeightsAndCapacities=[
        {
            'VariantName': 'version-1',
            'DesiredWeight': 5
        },
        {
            'VariantName': 'version-2',
            'DesiredWeight': 5
        }
    ]
)
print(response)

{'EndpointArn': 'arn:aws:sagemaker:us-east-2:805291263703:endpoint/pytorch-dogscats-ep-2021-03-30-01-56-36', 'ResponseMetadata': {'RequestId': '82f19bae-2a6b-4ead-97ce-1622e5aa4189', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '82f19bae-2a6b-4ead-97ce-1622e5aa4189', 'content-type': 'application/x-amz-json-1.1', 'content-length': '107', 'date': 'Tue, 30 Mar 2021 02:24:20 GMT'}, 'RetryAttempts': 0}}


# Inference

In [21]:
import os
import cv2
import torch
import matplotlib.pyplot as plt
import numpy as np
from io import BytesIO

In [16]:
TEST_DIR = './test1'

In [17]:
test_imgs = os.listdir(TEST_DIR)

In [22]:
smrt = boto3.client('runtime.sagemaker')

In [73]:
for i in range(1000):
    im = cv2.imread(os.path.join(TEST_DIR, test_imgs[0]))
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    im = cv2.resize(im, (224,224))
    tensor_x = np.expand_dims(im, axis=0)
    tensor_x = tensor_x.transpose((0,3,1,2))
    buffer = BytesIO()
    np.save(buffer, tensor_x)

    response = smrt.invoke_endpoint(
        EndpointName=ep_name,
        Body=buffer.getvalue(),
        ContentType='application/x-npy'
    )
#     print(response['InvokedProductionVariant'])
#     print(response['Body'].read())

# Delete endpoint, endpoint configuration and model

In [74]:
sm.delete_endpoint(EndpointName=ep_name)

{'ResponseMetadata': {'RequestId': 'd0a02d2c-bf20-40fe-9e86-cd2b965978dd',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd0a02d2c-bf20-40fe-9e86-cd2b965978dd',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Tue, 30 Mar 2021 07:08:05 GMT'},
  'RetryAttempts': 0}}

In [75]:
sm.delete_endpoint_config(EndpointConfigName=epc_name)

{'ResponseMetadata': {'RequestId': '390bbdcf-154c-4929-8c13-df45469232ef',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '390bbdcf-154c-4929-8c13-df45469232ef',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Tue, 30 Mar 2021 07:08:07 GMT'},
  'RetryAttempts': 0}}

In [76]:
sm.delete_model(ModelName=model_name_1)

{'ResponseMetadata': {'RequestId': '3501f012-f1a9-4943-80c6-779bc9244e92',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '3501f012-f1a9-4943-80c6-779bc9244e92',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Tue, 30 Mar 2021 07:08:14 GMT'},
  'RetryAttempts': 0}}

In [None]:
sm.delete_model(ModelName=model_name_2)