Create an IAM role called [SageMakerRole] with AmazonSageMakerFullAccess and AmazonEC2ContainerRegistryFullAccess manually

Add the IAM role of the notebook with AmazonEC2ContainerRegistryFullAccess mannually

# Create MXNet Container

In [None]:
# Create the binary
!git clone https://github.com/aws/sagemaker-mxnet-container.git
!cd sagemaker-mxnet-container
!git checkout v3.1.2
!python setup.py sdist

In [None]:
# MXNet 1.4.1, Python 3, CPU
!cp dist/sagemaker_mxnet_container-3.1.2.tar.gz docker/1.4.1/py3/sagemaker_mxnet_container.tar.gz
!cd docker/1.4.1/py3/
!docker build -t preprod-mxnet:1.4.1-cpu-py3 -f Dockerfile.cpu .

In [None]:
# upload container to ECS
# create-repository in ECR mannually
!$(aws ecr get-login --region us-east-1 --no-include-email)
!docker tag preprod-mxnet:1.4.1-cpu-py3 579019700964.dkr.ecr.us-east-1.amazonaws.com/preprod-mxnet:1.4.1-cpu-py3
!aws ecr create-repository --repository-name preprod-mxnet
!docker push 579019700964.dkr.ecr.us-east-1.amazonaws.com/preprod-mxnet:1.4.1-cpu-py3

In [None]:
!cd ../../../
!pip install -e .[test]

In [None]:
# May fail
!tox test/unit

In [None]:
# May fail
!tox -- test/integration/local --docker-base-name preprod-mxnet \
                              --tag 1.4.1-cpu-py3 \
                              --py-version 3 \
                              --framework-version 1.4.1 \
                              --processor cpu

In [None]:
# May fail
!tox -- test/integration/sagemaker --aws-id 579019700964 \
                                  --docker-base-name preprod-mxnet \
                                  --instance-type ml.m4.xlarge \
                                  --tag 1.4.1-cpu-py3

# Create TextClassification Train Docker

In [None]:
# create train

# create hyperparameters.json

# create inputdataconfig.json

# create resourceconfig.json

# create Dockerfile.train

!./classification-example.sh

!./build_and_push.sh text-classification-train train

!docker run text-classification-train train

# Test TextClassification Train Docker

In [6]:
import boto3

client = boto3.client('sts')
account = client.get_caller_identity()['Account']

my_session = boto3.session.Session()
region = my_session.region_name

algorithm_name = 'text-classification-train'

ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, algorithm_name)

print(ecr_image)

579019700964.dkr.ecr.us-east-1.amazonaws.com/text-classification-train:latest


In [7]:
!aws s3 cp data/dbpedia.train s3://sagemaker-us-east-1-579019700964/gcr_sagemaker_workshop/NLP/gluonnlp/data/dbpedia.train
!aws s3 cp data/dbpedia.test s3://sagemaker-us-east-1-579019700964/gcr_sagemaker_workshop/NLP/gluonnlp/data/dbpedia.test

upload: data/dbpedia.train to s3://sagemaker-us-east-1-579019700964/gcr_sagemaker_workshop/NLP/gluonnlp/data/dbpedia.train
upload: data/dbpedia.test to s3://sagemaker-us-east-1-579019700964/gcr_sagemaker_workshop/NLP/gluonnlp/data/dbpedia.test


# Local Mode

In [10]:
# Lets set up our SageMaker notebook instance for local mode.
!/bin/bash ./utils/setup.sh

The user has root access.
SageMaker instance route table setup is ok. We are good to go.
SageMaker instance routing for Docker is ok. We are good to go!


In [11]:
import json
from sagemaker.estimator import Estimator
from sagemaker import get_execution_role, Session

role = get_execution_role()

hyperparameters = json.load(open('hyperparameters.json', 'r'))

bucket = Session().default_bucket()
s3_train_data = 's3://{}/{}'.format(bucket, 'gcr_sagemaker_workshop/NLP/gluonnlp/data/dbpedia.train')
s3_validation_data = 's3://{}/{}'.format(bucket, 'gcr_sagemaker_workshop/NLP/gluonnlp/data/dbpedia.test')
train_data = session.s3_input(s3_train_data, distribution='FullyReplicated', 
                        content_type='text/plain', s3_data_type='S3Prefix')
validation_data = session.s3_input(s3_validation_data, distribution='FullyReplicated', 
                             content_type='text/plain', s3_data_type='S3Prefix')
data_channels = {'train': train_data, 'validation': validation_data}

instance_type = 'local'

estimator = Estimator(role=role,
                      train_instance_count=1,
                      train_instance_type=instance_type,
                      image_name='text-classification-train:latest',
                      hyperparameters=hyperparameters)

estimator.fit(data_channels)

Creating tmpzn8nlpk7_algo-1-95dti_1 ... 


KeyboardInterrupt: 

# SageMaker Mode

In [None]:
import json
from sagemaker.estimator import Estimator
from sagemaker import get_execution_role, Session

role = get_execution_role()

hyperparameters = json.load(open('hyperparameters.json', 'r'))

bucket = Session().default_bucket()
s3_train_data = 's3://{}/{}'.format(bucket, 'gcr_sagemaker_workshop/NLP/gluonnlp/data/dbpedia.train')
s3_validation_data = 's3://{}/{}'.format(bucket, 'gcr_sagemaker_workshop/NLP/gluonnlp/data/dbpedia.test')
train_data = session.s3_input(s3_train_data, distribution='FullyReplicated', 
                        content_type='text/plain', s3_data_type='S3Prefix')
validation_data = session.s3_input(s3_validation_data, distribution='FullyReplicated', 
                             content_type='text/plain', s3_data_type='S3Prefix')
data_channels = {'train': train_data, 'validation': validation_data}

instance_type = 'ml.m4.4xlarge'

estimator = Estimator(role=role,
                      train_instance_count=1,
                      train_instance_type=instance_type,
                      image_name=ecr_image,
                      hyperparameters=hyperparameters)

estimator.fit(data_channels)

2019-09-12 07:20:55 Starting - Starting the training job...
2019-09-12 07:20:56 Starting - Launching requested ML instances.........
2019-09-12 07:22:27 Starting - Preparing the instances for training...
2019-09-12 07:23:23 Downloading - Downloading input data...
2019-09-12 07:23:44 Training - Downloading the training image......
2019-09-12 07:24:30 Training - Training image download completed. Training in progress.......................................................................................................................................................................................................................................................................................

# Create MXNet Serving Container (TODO)

In [None]:
!git clone https://github.com/aws/sagemaker-mxnet-serving-container.git
!cd sagemaker-mxnet-serving-container
!git checkout v1.1.3
!python setup.py sdist

In [None]:
!cp dist/sagemaker_mxnet_serving_container-1.1.3.tar.gz docker/1.4.1/py3/sagemaker_mxnet_serving_container.tar.gz
!cd docker/1.4.1/py3/
!docker build -t preprod-mxnet-serving:1.4.1-cpu-py3 -f Dockerfile.cpu .

In [None]:
# upload container to ECS
# create-repository in ECR mannually
!$(aws ecr get-login --region us-east-1 --no-include-email)
!docker tag preprod-mxnet-serving:1.4.1-cpu-py3 579019700964.dkr.ecr.us-east-1.amazonaws.com/preprod-mxnet-serving:1.4.1-cpu-py3
!aws ecr create-repository --repository-name preprod-mxnet-serving
!docker push 579019700964.dkr.ecr.us-east-1.amazonaws.com/preprod-mxnet-serving:1.4.1-cpu-py3

In [None]:
!cd ../../../
!pip install -e .[test]

In [None]:
!tox test/unit

In [None]:
# May fail
!tox test/integration/local -- --docker-base-name preprod-mxnet-serving \
                              --tag 1.4.1-cpu-py3 \
                              --py-version 3 \
                              --framework-version 1.4.1 \
                              --processor cpu

In [None]:
# May fail
!tox test/integration/sagemaker -- --aws-id 579019700964 \
                                  --docker-base-name preprod-mxnet-serving \
                                  --instance-type ml.m4.xlarge \
                                  --tag 1.4.1-cpu-py3

# Create TextClassification Serving Docker

In [None]:
# create serve

# create Dockerfile.serve

./build_and_push.sh text-classification-serve serve

!docker run text-classification-serve serve