In [1]:
#Prior to execute this notebook, please make sure you have already cloned the sample data
#!git clone https://github.com/xieyongliang/all-in-one-ai-sample-data.git ../../../all-in-one-ai-sample-data

In [31]:
import sagemaker
from sagemaker.pytorch import PyTorch

In [2]:
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role = sagemaker.get_execution_role()

In [None]:
!rm -rf sourcedir.tar.gz
!tar czvf sourcedir.tar.gz *

In [None]:
source_dir = 's3://{0}/{1}/source/'.format(bucket, 'paddlenlp')
!aws s3 cp sourcedir.tar.gz $source_dir
source_dir = '{0}sourcedir.tar.gz'.format(source_dir)
!rm -f sourcedir.tar.gz

In [None]:
train_dir = 's3://{0}/{1}/data/train'.format(bucket, 'paddlenlp')
!aws s3 cp ../../../all-in-one-ai-sample-data/paddlenlp/train/ $train_dir --recursive

In [17]:
job_name = None
entry_point = 'finetune.py'
git_config = None
role = role
hyperparameters = {
    'train_path': '/opt/ml/input/data/dataset/train.txt',
    'dev_path': '/opt/ml/input/data/dataset/dev.txt', 
    'save_dir': '/opt/ml/model',                 
    'batch_size' : 16, 
    'learning_rate' : 1e-5, 
    'max_seq_len' : 512,
    'num_epochs' : 100,
    'seed' : 1000,
    'logging_steps': 10,
    'valid_steps': 100,
    'device': 'gpu',
    'model': 'uie-base'
}
framework_version = '1.9.0'
py_version = 'py38'
instance_type = 'ml.g4dn.16xlarge'
instance_count = 1
inputs = {
    'dataset': train_dir
}

In [19]:
estimator = PyTorch(
    entry_point = entry_point,
    source_dir = source_dir,
    git_config = git_config,
    role = role,
    debugger_hook_config = False,
    hyperparameters = hyperparameters,
    framework_version = framework_version, 
    py_version = py_version,
    instance_type = instance_type,
    instance_count = instance_count
)

In [None]:
estimator.fit(inputs, job_name = job_name)

In [66]:
sample_dir ='../../../all-in-one-ai-sample-data/paddlenlp/inference/sample'

In [67]:
from os import listdir
from os.path import isfile, join

files = [f for f in listdir(sample_dir) if isfile(join(sample_dir, f))]

In [70]:
inputs  = open(join(sample_dir, files[0]), 'r').read()

In [72]:
training_job_name = estimator.latest_training_job.name

In [73]:
model_name = None
model_data='s3://{}/{}/output/model.tar.gz'.format(bucket, training_job_name)
entry_point = 'inference.py'
framework_version = '1.9.0'
py_version = 'py38'
model_environment = {
    'schema': '["出发地", "目的地", "费用", "时间"]'
}

In [74]:
from sagemaker.pytorch.model import PyTorchModel

model = PyTorchModel(
    name = model_name,
    model_data = model_data,
    entry_point = entry_point,
    source_dir = source_dir,
    role = role,
    framework_version = framework_version, 
    py_version = py_version,
    env = model_environment
)

In [45]:
endpoint_name = None
instance_type = 'ml.m5.xlarge'
instance_count = 1

In [None]:
predictor = model.deploy(
    endpoint_name = endpoint_name,
    instance_type = instance_type, 
    initial_instance_count = instance_count
)

In [None]:
from os import listdir
from os.path import isfile, join

sample_dir ='../../../all-in-one-ai-sample-data/paddlenlp/inference/sample'
files = [f for f in listdir(sample_dir) if isfile(join(sample_dir, f))]

inputs  = open(join(sample_dir, files[0]), 'r').read()

In [None]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

predictor.serializer = JSONSerializer()
predictor.deserializer = JSONDeserializer()

predictor.predict(
    {
        "inputs": inputs
    }
)

In [None]:
predictor.delete_endpoint()