In [None]:
!docker build -t sagemaker_byoc_test .
!docker tag sagemaker_byoc_test:latest 236995464743.dkr.ecr.us-west-2.amazonaws.com/sagemaker_byoc_test:latest

In [None]:
import boto3
import sagemaker
from sagemaker import Model
from sagemaker.multidatamodel import MultiDataModel
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

In [None]:
sess = sagemaker.Session()
role = sagemaker.get_execution_role()

In [None]:
model_name = "casperhansen/mixtral-instruct-awq"
s3_code_prefix = f"large-model-vllm/{model_name}code"
bucket = sess.default_bucket() 

In [None]:
container = "236995464743.dkr.ecr.us-west-2.amazonaws.com/sagemaker_byoc_test:latest"
model = Model(
        name="your-model-name",
        model_data="s3://sagemaker-us-west-2-236995464743/mymodel.tar.gz",
        image_uri=container,
        role=role,
    )

In [None]:
# 部署模型到endpoint
endpoint_name = sagemaker.utils.name_from_base(f"-{model_name.replace('/', '-')}")
print(f"endpoint_name: {endpoint_name}")
predictor = model.deploy(
        initial_instance_count=1,
        instance_type='ml.t2.medium',
        endpoint_name=endpoint_name,
    )

In [None]:
runtime = boto3.client('runtime.sagemaker')
import json
    
# 测试 /ping
response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=json.dumps({"data": [1, 2, 3]}),
    CustomAttributes='"{\"custom-attributes\":\"x-amzn-sagemaker-target-variant=AllTraffic,x-amzn-sagemaker-inference-target=/ping\"}"'
)
print("Ping response:", response['Body'].read().decode())

# 测试 /invocations
payload = {"data": [1, 2, 3, 4, 5]}
response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=json.dumps(payload)
)
print("Invocations response:", response['Body'].read().decode())