In [1]:
from sagemaker import get_execution_role
import boto3
import sagemaker

# role = get_execution_role()
dev=boto3.session.Session()
region=dev.region_name
sagemaker_session = sagemaker.session.Session()
bucket = sagemaker_session.default_bucket()
prefix = 'gpt-serverless-model'
sm_client = dev.client("sagemaker")

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam_client = dev.client('iam')
    role = iam_client.get_role(RoleName='SageMakerRole')['Role']['Arn']


print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {bucket}")
print(f"sagemaker session region: {region}")

aws_role = role
aws_region = region
sess = sagemaker_session

Couldn't call 'get_role' to get Role ARN from role name michaelcruz@aim.com to get Role path.


sagemaker role arn: arn:aws:iam::193309394638:role/SageMakerRole
sagemaker bucket: sagemaker-us-east-1-193309394638
sagemaker session region: us-east-1


In [2]:
import os
from transformers import GPT2Tokenizer, GPT2LMHeadModel

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

model_path = 'model/'

if not os.path.exists(model_path):
    os.mkdir(model_path)
    
model.save_pretrained(save_directory=model_path)
tokenizer.save_vocabulary(save_directory=model_path)



('model/vocab.json', 'model/merges.txt')

In [3]:
# !mkdir model/code
!cp code/inference.py model/code/inference.py

In [4]:
!pygmentize model/code/inference.py

[38;2;0;128;0;01mimport[39;00m [38;2;0;0;255;01mos[39;00m
[38;2;0;128;0;01mimport[39;00m [38;2;0;0;255;01mjson[39;00m
[38;2;0;128;0;01mfrom[39;00m [38;2;0;0;255;01mtransformers[39;00m [38;2;0;128;0;01mimport[39;00m GPT2Tokenizer, TextGenerationPipeline, GPT2LMHeadModel

[38;2;61;123;123;03m# Load the model for inference[39;00m
[38;2;0;128;0;01mdef[39;00m [38;2;0;0;255mmodel_fn[39m(model_dir):

    [38;2;61;123;123;03m# Load GPT2 tokenizer from disk.[39;00m
    vocab_path [38;2;102;102;102m=[39m os[38;2;102;102;102m.[39mpath[38;2;102;102;102m.[39mjoin(model_dir, [38;2;186;33;33m'[39m[38;2;186;33;33mvocab.json[39m[38;2;186;33;33m'[39m)
    merges_path [38;2;102;102;102m=[39m os[38;2;102;102;102m.[39mpath[38;2;102;102;102m.[39mjoin(model_dir, [38;2;186;33;33m'[39m[38;2;186;33;33mmerges.txt[39m[38;2;186;33;33m'[39m)
    
    tokenizer [38;2;102;102;102m=[39m GPT2Tokenizer(vocab_file[38;2;102;102;102m=[39mvocab_path, merges_file[38;2;102;10

In [5]:
!tar -czvf model/model.tar.gz -C model/ .

a .
a ./config.json
a ./code
a ./merges.txt
a ./model.tar.gztar: ./model.tar.gz: Can't add archive to itself

a ./pytorch_model.bin
a ./vocab.json
a ./code/inference.py


In [6]:
from sagemaker.s3 import S3Uploader

model_data = S3Uploader.upload('model/model.tar.gz', 's3://{0}/{1}'.format(bucket,prefix))
model_data

's3://sagemaker-us-east-1-193309394638/gpt-serverless-model/model.tar.gz'

In [7]:
image_uri = "763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-cpu-py38-ubuntu20.04"

model_name    = 'gpt-2-serverless-model'
epc_name     = 'gpt-2-serverless-model-epc'
endpoint_name = 'gpt-2-serverless-model-ep'

primary_container = {
    'Image': image_uri,
    'ModelDataUrl': model_data,
    'Environment': {
        'SAGEMAKER_PROGRAM': 'inference.py',
        'SAGEMAKER_REGION': region,
        'SAGEMAKER_SUBMIT_DIRECTORY': model_data
    }
}

In [13]:
# # Create/Register a GPT-2 model in SM
# from sagemaker import get_execution_role

# create_model_response = sm_client.create_model(ModelName = model_name,
#                                             #   ExecutionRoleArn = get_execution_role(),
#                                               ExecutionRoleArn = role,
#                                               PrimaryContainer = primary_container)

# print(create_model_response['ModelArn'])

# # Create a SM Serverless endpoint config
# endpoint_config_response = sm_client.create_endpoint_config(
#     EndpointConfigName = epc_name,
#     ProductionVariants=[
#         {
#         'ServerlessConfig':{
#             'MemorySizeInMB' : 6144,
#             'MaxConcurrency' : 5
#         },
#         'ModelName':model_name,
#         'VariantName':'AllTraffic',
#         'InitialVariantWeight':1
#         }
#     ])

# print('Endpoint configuration arn:  {}'.format(endpoint_config_response['EndpointConfigArn']))

# # Create a SM Serverless endpoint config
# endpoint_params = {
#     'EndpointName': endpoint_name,
#     'EndpointConfigName': epc_name,
# }
# endpoint_response = sm_client.create_endpoint(EndpointName=endpoint_name, EndpointConfigName=epc_name)
# print('EndpointArn = {}'.format(endpoint_response['EndpointArn']))

In [14]:
import boto3
import json

invoke_client = boto3.client('sagemaker-runtime')
prompt = "Working with motorcyles is "
    
response = invoke_client.invoke_endpoint(EndpointName=endpoint_name, 
                            Body=json.dumps(prompt),
                            ContentType='text/csv')

response['Body'].read().decode('utf-8')

'[{\'generated_text\': \'"Working with motorcyles is "a great way to get the most out of your bike,"\'}]'

In [None]:
# sm_client.delete_model(ModelName=model_name)
# sm_client.delete_endpoint_config(EndpointConfigName=epc_name)
# sm_client.delete_endpoint(EndpointName=endpoint_name)