In [1]:
# S3 prefix
bucket = 'sagemaker-getting-start-test'
prefix = 'sagemaker/scikit-tpot'

# Import libraries
from sagemaker import get_execution_role
import boto3, sys, os
import sagemaker

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()
my_region = boto3.session.Session().region_name # set the region of the instance
print("Execution role is " + role)
print("Success - the MySageMakerInstance is in the " + my_region + ".")

Execution role is arn:aws:iam::251344623468:role/service-role/AmazonSageMaker-ExecutionRole-20191017T203175
Success - the MySageMakerInstance is in the ap-northeast-1.


In [2]:
s3 = boto3.resource('s3')

try:
    if my_region == 'ap-northeast-1':
        s3.create_bucket(Bucket=bucket)
    else:
        s3.create_bucket(Bucket=bucket, CreateBucketConfiguration={'LocationConstraint': my_region})
    print('S3 bucket created successfully')
except Exception as e:
    print('S3 error: ', e)

S3 error:  An error occurred (IllegalLocationConstraintException) when calling the CreateBucket operation: The unspecified location constraint is incompatible for the region specific endpoint this request was sent to.


In [5]:
import os

# Create directory and upload data to S3
os.makedirs('./data', exist_ok=True)
WORK_DIRECTORY = 'data'
train_input = sagemaker_session.upload_data("{}/iris.csv".format(WORK_DIRECTORY), bucket=bucket, key_prefix="{}/{}".format(prefix, WORK_DIRECTORY))

In [6]:
# train data and save a model
account = sagemaker_session.boto_session.client('sts').get_caller_identity()['Account']
region = sagemaker_session.boto_session.region_name
container_name = 'sklearn-tpot-container'
image_full = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, container_name)

clf = sagemaker.estimator.Estimator(image_full, role, 1, 'ml.c4.2xlarge', 
                                    output_path="s3://{}/{}/output".format(bucket, prefix),
                                    sagemaker_session=sagemaker_session)

params = dict(generations=10, populations=10, cv=5)
clf.set_hyperparameters(**params)

# training with the gradient boosting classifier model
clf.fit(train_input)

2020-01-11 02:11:09 Starting - Starting the training job...
2020-01-11 02:11:11 Starting - Launching requested ML instances......
2020-01-11 02:12:13 Starting - Preparing the instances for training...
2020-01-11 02:13:02 Downloading - Downloading input data
2020-01-11 02:13:02 Training - Downloading the training image...
2020-01-11 02:13:40 Training - Training image download completed. Training in progress..[34mStarting the training.[0m
[34mX shape: (150,4)[0m
[34my shape: (150,1)[0m
[34mGeneration 1 - Current best internal CV score: 0.9666666666666668[0m
[34mGeneration 2 - Current best internal CV score: 0.9666666666666668[0m
[34mGeneration 3 - Current best internal CV score: 0.9666666666666668[0m
[34mGeneration 4 - Current best internal CV score: 0.9666666666666668[0m
[34mGeneration 5 - Current best internal CV score: 0.9666666666666668[0m
[34mGeneration 6 - Current best internal CV score: 0.9666666666666668[0m
[34mGeneration 7 - Current best internal CV score: 0.9

Training seconds: 106
Billable seconds: 106


In [7]:
from sagemaker.predictor import csv_serializer
predictor = clf.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge", serializer=csv_serializer)

--------------------------------------------------------------!

In [19]:
# load test payload
import numpy as np
import pandas as pd
test_data = pd.read_csv("{}/payload_iris.csv".format(WORK_DIRECTORY), header=None)
test_X = test_data.iloc[:, :-1]
test_y = test_data.iloc[:, [-1]]
print("test_data: {}".format(test_data.shape))

test_data: (15, 4)


In [20]:
predictions = predictor.predict(test_data.values).decode('utf-8')
predictions_array = np.fromstring(predictions, sep=' ') # and turn the prediction into an array
print("Predicted values:\n{}".format(predictions_array))

Predicted values:
[0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 2. 2. 2. 2. 2.]


In [21]:
clf.delete_endpoint()