In [None]:
#Prior to execute this notebook, please make sure you have already cloned the sample data
#!git clone https://github.com/xieyongliang/all-in-one-ai-sample-data.git -t ../../..

In [10]:
import sagemaker
from sagemaker.pytorch import PyTorch

In [2]:
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role = sagemaker.get_execution_role()

In [None]:
!rm -rf sourcedir.tar.gz
!tar czvf sourcedir.tar.gz *

In [None]:
source_dir = 's3://{0}/{1}/source/'.format(bucket, 'gabsa')
!aws s3 cp sourcedir.tar.gz $source_dir
source_dir = '{0}sourcedir.tar.gz'.format(source_dir)
!rm -f sourcedir.tar.gz

In [None]:
train_dir = 's3://{0}/{1}/data/train'.format(bucket, 'gabsa')
!aws s3 cp ../../../all-in-one-ai-sample-data/gabsa/train/ $train_dir --recursive

In [8]:
job_name = None
entry_point = 'train.py'
git_config = None
role = role
hyperparameters = {
    'task' : 'tasd', 
    'dataset' : 'dataset', 
    'model_name_or_path' : 't5-base', 
    'paradigm': 'extraction',
    'eval_batch_size' :'16',
    'train_batch_size' :'2',
    'learning_rate' :'3e-4',
    'num_train_epochs':'4'
}
framework_version = '1.7.1'
py_version = 'py36'
instance_type = 'ml.p3.2xlarge'
instance_count = 1
inputs = {
    'dataset': train_dir
}

In [11]:
estimator = PyTorch(
    entry_point = entry_point,
    source_dir = source_dir,
    git_config = git_config,
    role = role,
    debugger_hook_config = False,
    hyperparameters = hyperparameters,
    framework_version = framework_version, 
    py_version = py_version,
    instance_type = instance_type,
    instance_count = instance_count
)

In [None]:
estimator.fit(inputs, job_name = job_name)

In [None]:
training_job_name = estimator.latest_training_job.name

In [13]:
model_name = None
model_data='s3://{}/{}/output/model.tar.gz'.format(bucket, training_job_name)
entry_point = 'inference.py'
framework_version = '1.7.1'
py_version = 'py36'
model_environment = {}

In [14]:
from sagemaker.pytorch.model import PyTorchModel

model = PyTorchModel(
    name = model_name,
    model_data = model_data,
    entry_point = entry_point,
    source_dir = source_dir,
    role = role,
    framework_version = framework_version, 
    py_version = py_version,
    env = model_environment
)

In [15]:
endpoint_name = None
instance_type = 'ml.m5.xlarge'
instance_count = 1

In [None]:
predictor = model.deploy(
    endpoint_name = endpoint_name,
    instance_type = instance_type, 
    initial_instance_count = instance_count
)

In [17]:
from os import listdir
from os.path import isfile, join

sample_dir ='../../../all-in-one-ai-sample-data/gabsa/inference/sample'
files = [f for f in listdir(sample_dir) if isfile(join(sample_dir, f))]

inputs  = open(join(sample_dir, files[0]), 'r').read()

In [None]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

predictor.serializer = JSONSerializer()
predictor.deserializer = JSONDeserializer()

predictor.predict(
    {
        'inputs': inputs
    }
)

In [19]:
predictor.delete_endpoint()