In [None]:
#Prior to execute this notebook, please make sure you have already cloned the sample data
#!git clone https://github.com/xieyongliang/all-in-one-ai-sample-data.git ../../../all-in-one-ai-sample-data

In [1]:
import sagemaker
from sagemaker.huggingface import HuggingFace

In [2]:
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role = sagemaker.get_execution_role()

In [None]:
!rm -rf sourcedir.tar.gz
!tar czvf sourcedir.tar.gz *

In [None]:
source_dir = 's3://{0}/{1}/source/'.format(bucket, 'cpt')
!aws s3 cp sourcedir.tar.gz $source_dir
source_dir = '{0}sourcedir.tar.gz'.format(source_dir)
!rm -f sourcedir.tar.gz

In [None]:
train_dir = 's3://{0}/{1}/data/train'.format(bucket, 'cpt')
!aws s3 cp ../../../all-in-one-ai-sample-data/cpt/train/ $train_dir --recursive

In [40]:
job_name = None
entry_point = 'train.py'
git_config = None
role = role
hyperparameters = {
    'model_name_or_path': 'fnlp/cpt-large',
    'num_train_epochs': 10,
    'per_device_train_batch_size': 4,   
    'text_column': 'text',
    'summary_column': 'summary',
    'output_dir': '/opt/ml/model',
    'train_file': '/opt/ml/input/data/dataset/train.json',
    'validation_file':'/opt/ml/input/data/dataset/val.json',
    'test_file': '/opt/ml/input/data/dataset/test.json',
    'val_max_target_length': 80,
    'path': 'json'
}
transformers_version = '4.12.3'
py_version = 'py38'
pytorch_version = '1.9.1'
tensorflow_version = None
instance_type = 'ml.g4dn.16xlarge'
instance_count = 1
inputs = {
    'dataset': train_dir
}

In [41]:
estimator = HuggingFace(
    entry_point = entry_point,
    source_dir = source_dir,
    role = role,
    debugger_hook_config = False,
    hyperparameters = hyperparameters,
    py_version = py_version,
    transformers_version = transformers_version,
    pytorch_version = pytorch_version,
    instance_count = instance_count,  
    instance_type = instance_type
)

In [None]:
estimator.fit(inputs, job_name = job_name)

In [None]:
training_job_name = estimator.latest_training_job.name

In [30]:
model_name = None
model_data='s3://{}/{}/output/model.tar.gz'.format(bucket, training_job_name)
entry_point = 'inference.py'
framework_version = '1.9.0'
py_version = 'py38'
model_environment = {}

In [31]:
from sagemaker.pytorch.model import PyTorchModel

model = PyTorchModel(
    name = model_name,
    model_data = model_data,
    entry_point = entry_point,
    source_dir = source_dir,
    role = role,
    framework_version = framework_version, 
    py_version = py_version,
    env = model_environment
)

In [32]:
endpoint_name = None
instance_type = 'ml.m5.xlarge'
instance_count = 1

In [None]:
predictor = model.deploy(
    endpoint_name = endpoint_name,
    instance_type = instance_type, 
    initial_instance_count = instance_count
)

In [35]:
from os import listdir
from os.path import isfile, join

sample_dir ='../../../all-in-one-ai-sample-data/cpt/inference/sample'
files = [f for f in listdir(sample_dir) if isfile(join(sample_dir, f))]

inputs  = open(join(sample_dir, files[0]), 'r').read()

In [None]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

predictor.serializer = JSONSerializer()
predictor.deserializer = JSONDeserializer()

predictor.predict(
    {
        'inputs': inputs
    }
)

In [37]:
predictor.delete_endpoint()