In [51]:
#Prior to execute this notebook, please make sure you have already cloned the sample data
#!git clone https://github.com/xieyongliang/all-in-one-ai-sample-data.git -t ../../..

In [61]:
import sagemaker
from sagemaker.tensorflow import TensorFlow

In [53]:
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role = sagemaker.get_execution_role()

In [None]:
!rm -rf sourcedir.tar.gz
!tar czvf sourcedir.tar.gz *

In [None]:
source_dir = 's3://{0}/{1}/source/'.format(bucket, 'paddleocr')
!aws s3 cp sourcedir.tar.gz $source_dir
source_dir = '{0}sourcedir.tar.gz'.format(source_dir)
!rm -f sourcedir.tar.gz

In [None]:
dataset_dir = 's3://{0}/{1}/data/dataset'.format(bucket, 'paddleocr')
!aws s3 cp ../../../all-in-one-ai-sample-data/paddleocr/train/ $dataset_dir --recursive

In [None]:
pretrain_models_dir = 's3://{0}/{1}/data/pretrain_models'.format(bucket, 'paddleocr')
!mkdir -p pretrain_models
!wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams
!wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet18_vd_pretrained.pdparams
!wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet50_vd_ssld_pretrained.pdparams
!aws s3 cp pretrain_models $pretrain_models_dir --recursive
!rm -rf pretrain_models

In [62]:
job_name = None
entry_point = 'train.py'
git_config = None
role = role
hyperparameters = {
    'c': '/opt/ml/input/data/dataset/configs/det/det_mv3_db.yml'
}
framework_version = '2.2.2'
py_version = 'py37'
instance_type = 'ml.g4dn.16xlarge'
instance_count = 1
inputs = {
    'dataset': dataset_dir,
    'pretrained_models': pretrain_models_dir
}

In [64]:
estimator = TensorFlow(
    entry_point = entry_point,
    source_dir = source_dir,
    git_config = git_config,
    role = role,
    debugger_hook_config = False,
    hyperparameters = hyperparameters,
    framework_version = framework_version, 
    py_version = py_version,
    instance_type = instance_type,
    instance_count = instance_count
)

In [None]:
estimator.fit(inputs, job_name = job_name)

In [None]:
training_job_name = estimator.latest_training_job.name

In [None]:
!touch dummy
!tar czvf model.tar.gz dummy
assets_dir = 's3://{0}/{1}/assets/'.format(bucket, 'paddleocr')
model_data = 's3://{0}/{1}/assets/model.tar.gz'.format(bucket, 'paddleocr')
!aws s3 cp model.tar.gz $assets_dir
!rm -f dummy model.tar.gz

In [13]:
model_name = None
model_data = model_data
entry_point = 'inference.py'
framework_version = '1.9.0'
py_version = 'py38'
model_environment = {}

In [24]:
from sagemaker.pytorch.model import PyTorchModel

model = PyTorchModel(
    name = model_name,
    model_data = model_data,
    entry_point = entry_point,
    source_dir = source_dir,
    role = role,
    framework_version = framework_version, 
    py_version = py_version,
    env = model_environment
)

In [25]:
endpoint_name = None
instance_type = 'ml.m5.xlarge'
instance_count = 1

In [None]:
predictor = model.deploy(
    endpoint_name = endpoint_name,
    instance_type = instance_type, 
    initial_instance_count = instance_count
)

In [48]:
from os import listdir
from os.path import isfile, join

sample_dir ='../../../all-in-one-ai-sample-data/paddleocr/inference'
files = [f for f in listdir(sample_dir) if isfile(join(sample_dir, f)) and (f.endswith('png') or f.endswith('jpg') or f.endswith('.jpeg'))]

bytes  = open(join(sample_dir, files[0]), 'rb').read()

In [None]:
from sagemaker.serializers import DataSerializer 
from sagemaker.deserializers import JSONDeserializer

predictor.serializer = DataSerializer()
predictor.deserializer = JSONDeserializer()

predictor.predict(bytes, initial_args={"ContentType":"image/png"})

In [None]:
predictor.delete_endpoint()