# AutoGluon Tabular with SageMaker

<br>

## Prerequisites

If using a SageMaker hosted notebook, select kernel `conda_mxnet_p36`.

In [1]:
# Make sure docker compose is set up properly for local mode
!./setup.sh

The user has root access.
SageMaker instance route table setup is ok. We are good to go.
SageMaker instance routing for Docker is ok. We are good to go!


In [2]:
# Used to perform df.to_csv('s3://<path>')
!pip install pandas==1.0.3 s3fs==0.4.0 | grep -v 'already satisfied'

Collecting pandas==1.0.3
  Downloading https://files.pythonhosted.org/packages/bb/71/8f53bdbcbc67c912b888b40def255767e475402e9df64050019149b1a943/pandas-1.0.3-cp36-cp36m-manylinux1_x86_64.whl (10.0MB)
Collecting s3fs==0.4.0
  Downloading https://files.pythonhosted.org/packages/72/5c/ec84c7ec49fde2c3b0d885ecae4504fa40fc77fef7684e9f2939c50f9b94/s3fs-0.4.0-py3-none-any.whl
Collecting fsspec>=0.6.0 (from s3fs==0.4.0)
  Downloading https://files.pythonhosted.org/packages/85/0f/ea31396f3fc46c375604ce4fb6f6714cbe0b01e8376ca277341e462e28db/fsspec-0.6.3-py3-none-any.whl (65kB)
Installing collected packages: pandas, fsspec, s3fs
  Found existing installation: pandas 0.24.2
    Uninstalling pandas-0.24.2:
      Successfully uninstalled pandas-0.24.2
  Found existing installation: s3fs 0.1.5
    Uninstalling s3fs-0.1.5:
      Successfully uninstalled s3fs-0.1.5
Successfully installed fsspec-0.6.3 pandas-1.0.3 s3fs-0.4.0
[33mYou are using pip version 10.0.1, however version 20.0.2 is available.
Yo

In [3]:
# Imports
import os
import boto3
import sagemaker
from collections import Counter
import pandas as pd
from sagemaker import get_execution_role, local, Model
from sagemaker.estimator import Estimator
from sagemaker.predictor import RealTimePredictor, csv_serializer, json_deserializer
from sklearn.metrics import accuracy_score, classification_report
from IPython.core.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell

# Print settings
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 10)

# Account/s3 setup
session = sagemaker.Session()
local_session = local.LocalSession()
bucket = session.default_bucket()
prefix = 'sagemaker/autogluon-tabular'
region = session.boto_region_name
role = get_execution_role()
client = boto3.client('sts')
account = client.get_caller_identity()['Account']

### Build training docker image

First, build autogluon package to copy into docker image

In [4]:
if not os.path.exists('package'):
    !pip install PrettyTable -t package
    !pip install bokeh -t package
    !pip install --pre autogluon -t package
    !pip install numpy==1.16.1 -t package    
    !pip install --upgrade boto3 -t package
    !pip install bokeh -t package
    !pip install --upgrade matplotlib -t package

Collecting PrettyTable
  Downloading https://files.pythonhosted.org/packages/ef/30/4b0746848746ed5941f052479e7c23d2b56d174b82f4fd34a25e389831f5/prettytable-0.7.2.tar.bz2
Building wheels for collected packages: PrettyTable
  Running setup.py bdist_wheel for PrettyTable ... [?25ldone
[?25h  Stored in directory: /home/ec2-user/.cache/pip/wheels/80/34/1c/3967380d9676d162cb59513bd9dc862d0584e045a162095606
Successfully built PrettyTable
Installing collected packages: PrettyTable
Successfully installed PrettyTable-0.7.2
[33mYou are using pip version 10.0.1, however version 20.0.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting bokeh
[?25l  Downloading https://files.pythonhosted.org/packages/e0/a7/875aad223b211951a043bf7b0eddcecb8b2afd5131c08945ff07ac968c7f/bokeh-2.0.0.tar.gz (8.5MB)
[K    100% |████████████████████████████████| 8.6MB 6.0MB/s eta 0:00:01
[?25hCollecting PyYAML>=3.10 (from bokeh)
Collecting python-dateutil>=2.1 (from

[?25hCollecting boto3 (from autogluon)
[?25l  Downloading https://files.pythonhosted.org/packages/34/f9/70047ba54fc389ec5e57e60e6b59d9f77d48d5dbd969b0f7ba6e65d33f56/boto3-1.12.27-py2.py3-none-any.whl (128kB)
[K    100% |████████████████████████████████| 133kB 42.3MB/s ta 0:00:01
[?25hCollecting paramiko>=2.5.0 (from autogluon)
  Using cached https://files.pythonhosted.org/packages/06/1e/1e08baaaf6c3d3df1459fd85f0e7d2d6aa916f33958f151ee1ecc9800971/paramiko-2.7.1-py2.py3-none-any.whl
Collecting numpy>=1.16.0 (from autogluon)
  Using cached https://files.pythonhosted.org/packages/07/08/a549ba8b061005bb629b76adc000f3caaaf881028b963c2e18f811c6edc1/numpy-1.18.2-cp36-cp36m-manylinux1_x86_64.whl
Collecting scikit-learn>=0.20.0 (from autogluon)
[?25l  Downloading https://files.pythonhosted.org/packages/5e/d8/312e03adf4c78663e17d802fe2440072376fee46cada1404f1727ed77a32/scikit_learn-0.22.2.post1-cp36-cp36m-manylinux1_x86_64.whl (7.1MB)
[K    100% |████████████████████████████████| 7.1MB 11.

Collecting cffi!=1.11.3,>=1.8 (from cryptography>=2.8->autogluon)
[?25l  Downloading https://files.pythonhosted.org/packages/f1/c7/72abda280893609e1ddfff90f8064568bd8bcb2c1770a9d5bb5edb2d1fea/cffi-1.14.0-cp36-cp36m-manylinux1_x86_64.whl (399kB)
[K    100% |████████████████████████████████| 399kB 38.6MB/s ta 0:00:01
[?25hCollecting cycler>=0.10 (from matplotlib->autogluon)
  Downloading https://files.pythonhosted.org/packages/f7/d2/e07d3ebb2bd7af696440ce7e754c59dd546ffe1bbe732c8ab68b9c834e61/cycler-0.10.0-py2.py3-none-any.whl
Collecting python-dateutil>=2.1 (from matplotlib->autogluon)
  Using cached https://files.pythonhosted.org/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl
Collecting kiwisolver>=1.0.1 (from matplotlib->autogluon)
[?25l  Downloading https://files.pythonhosted.org/packages/f8/a1/5742b56282449b1c0968197f63eae486eca2c35dcd334bab75ad524e0de1/kiwisolver-1.1.0-cp36-cp36m-manylinux1_x86_64.whl (90kB

[33mTarget directory /home/ec2-user/SageMaker/autogluon-tabular-sagemaker/package/dateutil already exists. Specify --upgrade to force replacement.[0m
[33mTarget directory /home/ec2-user/SageMaker/autogluon-tabular-sagemaker/package/numpy-1.18.2.dist-info already exists. Specify --upgrade to force replacement.[0m
[33mTarget directory /home/ec2-user/SageMaker/autogluon-tabular-sagemaker/package/tornado already exists. Specify --upgrade to force replacement.[0m
[33mTarget directory /home/ec2-user/SageMaker/autogluon-tabular-sagemaker/package/pyparsing-2.4.6.dist-info already exists. Specify --upgrade to force replacement.[0m
[33mTarget directory /home/ec2-user/SageMaker/autogluon-tabular-sagemaker/package/numpy already exists. Specify --upgrade to force replacement.[0m
[33mTarget directory /home/ec2-user/SageMaker/autogluon-tabular-sagemaker/package/__pycache__ already exists. Specify --upgrade to force replacement.[0m
[33mTarget directory /home/ec2-user/SageMaker/autogluon-t

[33mYou are using pip version 10.0.1, however version 20.0.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting matplotlib
  Using cached https://files.pythonhosted.org/packages/93/4b/52da6b1523d5139d04e02d9e26ceda6146b48f2a4e5d2abfdf1c7bac8c40/matplotlib-3.2.1-cp36-cp36m-manylinux1_x86_64.whl
Collecting python-dateutil>=2.1 (from matplotlib)
  Using cached https://files.pythonhosted.org/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl
Collecting pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 (from matplotlib)
  Using cached https://files.pythonhosted.org/packages/5d/bc/1e58593167fade7b544bfe9502a26dc860940a79ab306e651e7f13be68c2/pyparsing-2.4.6-py2.py3-none-any.whl
Collecting cycler>=0.10 (from matplotlib)
  Using cached https://files.pythonhosted.org/packages/f7/d2/e07d3ebb2bd7af696440ce7e754c59dd546ffe1bbe732c8ab68b9c834e61/cycler-0.10.0-py2.py3-none-any.whl
Collecti

Now build the training image and push to ECR

In [5]:
training_algorithm_name = 'autogluon-sagemaker-training'

In [36]:
!./container-training/build_push_training.sh {training_algorithm_name}

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  1.001GB
Step 1/8 : FROM 763104351884.dkr.ecr.us-east-2.amazonaws.com/mxnet-training:1.6.0-cpu-py3
 ---> 129a8893d865
Step 2/8 : RUN pip install --upgrade pip
 ---> Using cache
 ---> d94b2ee91d31
Step 3/8 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> 40261fdfe01e
Step 4/8 : COPY package/ /opt/ml/code/package/
 ---> Using cache
 ---> 838524924e49
Step 5/8 : COPY container-training/train.py /opt/ml/code/train.py
 ---> Using cache
 ---> 6a6df25826b2
Step 6/8 : COPY container-training/inference.py /opt/ml/code/inference.py
 ---> b0ac52f4f7af
Step 7/8 : ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code
 ---> Running in a13cf138191d
Removing intermediate container a13cf138191d
 ---> 8f0bc38c456a
Step 8/8 : ENV SAGEMAKER_PROGRAM train.py
 ---> Running in d42fe75

### Get the data

In this example we'll use the direct-marketing dataset to build a binary classification model that predicts whether customers will accept or decline a marketing offer.  
First we'll download the data and split it into train and test sets. AutoGluon does not require a separate validation set (it uses bagged k-fold cross-validation).

In [37]:
# Download and unzip the data
!wget -N https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip --quiet
!unzip -qq -o bank-additional.zip
!rm bank-additional.zip

local_data_path = './bank-additional/bank-additional-full.csv'
data = pd.read_csv(local_data_path)

# Split train/test data
train = data.sample(frac=0.9, random_state=42)
test = data.drop(train.index)

# Split test X/y
target = 'y'
y_test = test[target]
X_test = test.drop(columns=[target])

##### Check the data

In [38]:
train.head(3)
train.shape

test.head(3)
test.shape

X_test.head(3)
X_test.shape

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
32884,57,technician,married,high.school,no,no,yes,cellular,may,mon,371,1,999,1,failure,-1.8,92.893,-46.2,1.299,5099.1,no
3169,55,unknown,married,unknown,unknown,yes,no,telephone,may,thu,285,2,999,0,nonexistent,1.1,93.994,-36.4,4.86,5191.0,no
32206,33,blue-collar,married,basic.9y,no,no,no,cellular,may,fri,52,1,999,1,failure,-1.8,92.893,-46.2,1.313,5099.1,no


(37069, 21)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
9,25,services,single,high.school,no,yes,no,telephone,may,mon,50,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
55,55,technician,married,professional.course,unknown,yes,no,telephone,may,mon,135,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
60,47,entrepreneur,married,university.degree,unknown,no,no,telephone,may,mon,449,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


(4119, 21)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
9,25,services,single,high.school,no,yes,no,telephone,may,mon,50,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
55,55,technician,married,professional.course,unknown,yes,no,telephone,may,mon,135,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
60,47,entrepreneur,married,university.degree,unknown,no,no,telephone,may,mon,449,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0


(4119, 20)

Upload the data to s3

In [9]:
# Set s3 paths
train_s3_path = f's3://{bucket}/{prefix}/data/train.csv'
test_s3_path = f's3://{bucket}/{prefix}/data/test.csv'
X_test_s3_path = f's3://{bucket}/{prefix}/data/X_test.csv'

# Upload
train.to_csv(train_s3_path, index=False)
print(f'Train data uploaded to {train_s3_path}')
test.to_csv(test_s3_path, index=False)
print(f'Labeled Test data uploaded to {test_s3_path}')
X_test.to_csv(X_test_s3_path, index=False)
print(f'Unlabeled Test data uploaded to {X_test_s3_path}')

Train data uploaded to s3://sagemaker-us-east-2-332270294944/sagemaker/autogluon-tabular/data/train.csv
Labeled Test data uploaded to s3://sagemaker-us-east-2-332270294944/sagemaker/autogluon-tabular/data/test.csv
Unlabeled Test data uploaded to s3://sagemaker-us-east-2-332270294944/sagemaker/autogluon-tabular/data/X_test.csv


## Train

For local training set `train_instance_type` to `local` .  
For non-local training the recommended instance type is `ml.m5.24xlarge` .

In [39]:
%%time

instance_type = 'ml.m5.24xlarge'
#instance_type = 'local'

ecr_image = f'{account}.dkr.ecr.{region}.amazonaws.com/{training_algorithm_name}:latest'

# Example autogluon hyperparameters
model_hps = {
    'NN': {'num_epochs': '500'},
    'GBM': {'num_boost_round': '10000',
            'num_leaves': 'ag.space.Int(lower=26, upper=66, default=36)'},
    'CAT': {'iterations': '10000'}, 
    'KNN': {},
    # (Adding RF or ET increase model export time)
}

hyperparameters = {
    'target': 'y',
    'time_limit': 2*60,
    'auto_stack': True,
    'num_bagging_sets': 20,
    'search_strategy': 'random',
    'hyperparameters': model_hps
}

estimator = Estimator(image_name=ecr_image,
                      role=role,
                      train_instance_count=1,
                      train_instance_type=instance_type,
                      hyperparameters=hyperparameters)

estimator.fit(train_s3_path)

2020-03-24 10:19:07 Starting - Starting the training job...
2020-03-24 10:19:08 Starting - Launching requested ML instances......
2020-03-24 10:20:33 Starting - Preparing the instances for training...
2020-03-24 10:21:08 Downloading - Downloading input data...
2020-03-24 10:21:13 Training - Downloading the training image....[34m2020-03-24 10:22:12,531 sagemaker-containers INFO     Imported framework sagemaker_mxnet_container.training[0m
[34m2020-03-24 10:22:12,532 sagemaker-containers INFO     Failed to parse hyperparameter search_strategy value random to Json.[0m
[34mReturning the value itself[0m
[34m2020-03-24 10:22:12,532 sagemaker-containers INFO     Failed to parse hyperparameter auto_stack value True to Json.[0m
[34mReturning the value itself[0m
[34m2020-03-24 10:22:12,532 sagemaker-containers INFO     Failed to parse hyperparameter hyperparameters value {'NN': {'num_epochs': '500'}, 'GBM': {'num_boost_round': '10000', 'num_leaves': 'ag.space.Int(lower=26, upper=66, de


  Optimizer.opt_registry[name].__name__))[0m
[0m
[34mhosts,  type: <class 'list'>,  value: ['algo-1'][0m
[34mcurrent_host,  type: <class 'str'>,  value: algo-1[0m
[34mnum_gpus,  type: <class 'int'>,  value: 0[0m
[34mmodel_dir,  type: <class 'str'>,  value: /opt/ml/model[0m
[34mtrain,  type: <class 'str'>,  value: /opt/ml/input/data/training[0m
[34mtarget,  type: <class 'str'>,  value: y[0m
[34mhyperparameter_tune,  type: <class 'bool'>,  value: False[0m
[34mnum_trials,  type: <class 'NoneType'>,  value: None[0m
[34mtime_limits,  type: <class 'int'>,  value: 120[0m
[34msearch_strategy,  type: <class 'str'>,  value: random[0m
[34mfeature_prune,  type: <class 'bool'>,  value: False[0m
[34mauto_stack,  type: <class 'bool'>,  value: True[0m
[34mnum_bagging_folds,  type: <class 'int'>,  value: 0[0m
[34mnum_bagging_sets,  type: <class 'int'>,  value: 20[0m
[34mstack_ensemble_levels,  type: <class 'int'>,  value: 0[0m
[34menable_fit_continuation,  type: <class

[34mAttempting to fit model without HPO, but search space is provided. fit() will only consider default hyperparameter values from search space.[0m
[34mAttempting to fit model without HPO, but search space is provided. fit() will only consider default hyperparameter values from search space.[0m
[34mAttempting to fit model without HPO, but search space is provided. fit() will only consider default hyperparameter values from search space.[0m
[34mAttempting to fit model without HPO, but search space is provided. fit() will only consider default hyperparameter values from search space.[0m
[34mAttempting to fit model without HPO, but search space is provided. fit() will only consider default hyperparameter values from search space.[0m
[34m#0110.9207#011 = Validation accuracy score[0m
[34m#0119.63s#011 = Training runtime[0m
[34m#0110.16s#011 = Validation runtime[0m
[34mFitting model: CatboostClassifier_STACKER_l1 ... Training model for up to 58.95s of the 58.95s of remaining

In [12]:
# Optionally, download model_data and display training performance graph
!rm -rf model_data && mkdir model_data && aws s3 cp {estimator.model_data} - | tar -xz -C model_data
d(HTML('model_data/SummaryOfModels.html'))

### Build inference docker image

Build the inference image and push to ECR

In [27]:
inference_algorithm_name = 'autogluon-sagemaker-inference'

In [28]:
!./container-inference/build_push_inference.sh {inference_algorithm_name}

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  1.001GB
Step 1/4 : FROM 763104351884.dkr.ecr.us-east-2.amazonaws.com/mxnet-inference:1.6.0-cpu-py3
 ---> 1e1193fbea5c
Step 2/4 : RUN pip install --upgrade pip
 ---> Using cache
 ---> 28fe56fd6a2d
Step 3/4 : COPY package/ /opt/ml/code/package/
 ---> Using cache
 ---> 1c40a4299495
Step 4/4 : ENV SAGEMAKER_PROGRAM inference.py
 ---> Using cache
 ---> f7b76b9af9cd
Successfully built f7b76b9af9cd
Successfully tagged autogluon-sagemaker-inference:latest
The push refers to repository [332270294944.dkr.ecr.us-east-2.amazonaws.com/autogluon-sagemaker-inference]

[1B22b200f1: Preparing 
[1Bcb76fc8c: Preparing 
[1Bf316b9d8: Preparing 
[1Be89e5b45: Preparing 
[1B71b99baf: Preparing 
[1B1c6d139e: Preparing 
[1Bde4347ad: Preparing 
[1B4b73c942: Preparing 
[1B4af985

### Create Model

In [40]:
# Create predictor object
class AutoGluonTabularPredictor(RealTimePredictor):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, content_type='text/csv', 
                         serializer=csv_serializer, 
                         deserializer=csv_deserializer, **kwargs)

In [41]:
ecr_image = f'{account}.dkr.ecr.{region}.amazonaws.com/{inference_algorithm_name}:latest'

if instance_type == 'local':
    model = estimator.create_model(image=ecr_image, role=role)
else:
    model_uri = os.path.join(estimator.output_path, estimator._current_job_name, "output", "model.tar.gz")
    model = Model(model_uri, ecr_image, role=role, sagemaker_session=session, predictor_cls=AutoGluonTabularPredictor)

### Batch Transform

For local mode, either `s3://<bucket>/<prefix>/output/` or `file:///<absolute_local_path>` can be used as outputs.

Include label column in input data to also evaluate prediction performance (In this case, passing `test_s3_path` instead of `X_test_s3_path`).

In [42]:
output_path = f's3://{bucket}/{prefix}/output/'
# output_path = f'file://{os.getcwd()}'

transformer = model.transformer(instance_count=1, 
                                instance_type=instance_type,
                                strategy='SingleRecord',
                                assemble_with='Line',
                                max_payload=1,
                                max_concurrent_transforms=100,                              
                                output_path=output_path)

transformer.transform(test_s3_path, content_type='text/csv')
transformer.wait()

...................[34m2020-03-24 10:28:19,796 [INFO ] main com.amazonaws.ml.mms.ModelServer - [0m
[34mMMS Home: /usr/local/lib/python3.6/site-packages[0m
[34mCurrent directory: /[0m
[34mTemp directory: /home/model-server/tmp[0m
[34mNumber of GPUs: 0[0m
[34mNumber of CPUs: 96[0m
[34mMax heap size: 27305 M[0m
[34mPython executable: /usr/local/bin/python3.6[0m
[34mConfig file: /etc/sagemaker-mms.properties[0m
[34mInference address: http://0.0.0.0:8080[0m
[34mManagement address: http://0.0.0.0:8080[0m
[34mModel Store: /.sagemaker/mms/models[0m
[34mInitial Models: ALL[0m
[34mLog dir: /logs[0m
[34mMetrics dir: /logs[0m
[34mNetty threads: 0[0m
[34mNetty client threads: 0[0m
[34mDefault workers per model: 96[0m
[34mBlacklist Regex: N/A[0m
[34mMaximum Response Size: 6553500[0m
[34mMaximum Request Size: 6553500[0m
[34m2020-03-24 10:28:19,834 [INFO ] main com.amazonaws.ml.mms.wlm.ModelManager - Model model loaded.[0m
[34m2020-03-24 10:28:19,958 [INFO 

[32m2020-03-24T10:28:27.272:[sagemaker logs]: MaxConcurrentTransforms=100, MaxPayloadInMB=1, BatchStrategy=SINGLE_RECORD[0m
[34m2020-03-24 10:28:27,260 [INFO ] pool-1-thread-97 ACCESS_LOG - /169.254.255.130:38348 "GET /ping HTTP/1.1" 200 11[0m
[34m2020-03-24 10:28:27,267 [INFO ] epollEventLoopGroup-3-2 ACCESS_LOG - /169.254.255.130:38352 "GET /execution-parameters HTTP/1.1" 404 0[0m
[34m2020-03-24 10:28:27,375 [INFO ] W-9030-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Loading model from /opt/ml/model with contents ['code', 'learner.pkl', 'models', 'SummaryOfModels.html'][0m
[35m2020-03-24 10:28:27,260 [INFO ] pool-1-thread-97 ACCESS_LOG - /169.254.255.130:38348 "GET /ping HTTP/1.1" 200 11[0m
[35m2020-03-24 10:28:27,267 [INFO ] epollEventLoopGroup-3-2 ACCESS_LOG - /169.254.255.130:38352 "GET /execution-parameters HTTP/1.1" 404 0[0m
[35m2020-03-24 10:28:27,375 [INFO ] W-9030-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Loading model from /opt/ml/model 




In [None]:
# Check s3 for <filename>.csv.out file
if instance_type != 'local':
    !aws s3 ls {transformer.output_path} --recursive
elif 's3' in output_path:
    !aws s3 ls {output_path+transformer.latest_transform_job.job_name} --recursive

### Endpoint

##### Deploy remote or local endpoint

In [None]:
instance_type = 'ml.m5.24xlarge'
instance_type = 'local'

predictor = model.deploy(initial_instance_count=1, 
                         instance_type=instance_type)

##### Attach to endpoint (or reattach if kernel was restarted)

In [None]:
# Select standard or local session based on instance_type
if instance_type == 'local': sess = local_session
else: sess = session

# Attach to endpoint
predictor = AutoGluonTabularPredictor(predictor.endpoint, sagemaker_session=sess)

##### Predict on unlabeled test data

In [None]:
results = predictor.predict(X_test.to_csv())

# Double check output
print(results)

##### Predict on data that includes label column  
Prediction performance metrics will be printed to endpoint logs.

In [None]:
results = predictor.predict(test.to_csv())

# Double check output
Counter(results)

##### Check that performance metrics match evaluation printed to endpoint logs as expected

In [None]:
print(accuracy_score(y_true=y_test, y_pred=results))

print(classification_report(y_true=y_test, y_pred=results, digits=6))

##### Clean up endpoint

In [None]:
predictor.delete_endpoint()

<br><br><br><br>

# FAQ

- **Q**: I get a disk full error when running local training and deployment, but my disk isn't full.
    - **A**: SageMaker uses `/tmp` folder which has a max size of ~10GB on SM hosted notebooks. Run `sudo rm -rf /tmp/*` to clean.    
- **Q**: I'm having docker issues with local training or deployment, what can I do?
    - **A**: Try deleting running containers. For example: `docker stop $(docker ps -aq) && docker rm $(docker ps -aq)`