In [1]:
import sagemaker
sess = sagemaker.Session()

In [2]:
#%%sh
#wget -N https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip
#unzip -o bank-additional.zip

In [3]:
bucket = sess.default_bucket()
prefix = 'sagemaker/DEMO-automl-dm'
s3_input_data = sess.upload_data(path="bank-additional/bank-additional-full.csv", key_prefix=prefix+'input')

In [4]:
from sagemaker.automl.automl import AutoML
auto_ml_job = AutoML(
    role = sagemaker.get_execution_role(),
    sagemaker_session = sess,
    target_attribute_name = 'y',
    output_path ='s3://{}/{}/output'.format(bucket,prefix),
    max_runtime_per_training_job_in_seconds = 600,
    max_candidates = 250,
    total_job_runtime_in_seconds = 3600
)

In [5]:
auto_ml_job.fit(inputs=s3_input_data, logs=False,wait=False)

In [6]:
from time import sleep
job = auto_ml_job.describe_auto_ml_job()
job_status = job['AutoMLJobStatus']
job_sec_status = job['AutoMLJobSecondaryStatus']
if job_status not in ('Stopped', 'Failed'):
    while job_status in ('InProgress') and job_sec_status in ('AnalyzingData'):
        sleep(30)
        job = auto_ml_job.describe_auto_ml_job()
        job_status = job['AutoMLJobStatus']
        job_sec_status = job['AutoMLJobSecondaryStatus']
        print (job_status, job_sec_status)

InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress AnalyzingData
InProgress FeatureEngineering


In [7]:
job = auto_ml_job.describe_auto_ml_job()
job_candidate_notebook = job['AutoMLJobArtifacts']
['CandidateDefinitionNotebookLocation']
job_data_notebook = job['AutoMLJobArtifacts']
['DataExplorationNotebookLocation']
print(job_candidate_notebook)
print(job_data_notebook)

{'CandidateDefinitionNotebookLocation': 's3://sagemaker-us-east-2-664224523979/sagemaker/DEMO-automl-dm/output/automl-2021-04-04-11-51-27-570/sagemaker-automl-candidates/pr-1-6bd5d35fb19e4be8928bd9a6825b6db1f8772be33fe34153a6a9a44305/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb', 'DataExplorationNotebookLocation': 's3://sagemaker-us-east-2-664224523979/sagemaker/DEMO-automl-dm/output/automl-2021-04-04-11-51-27-570/sagemaker-automl-candidates/pr-1-6bd5d35fb19e4be8928bd9a6825b6db1f8772be33fe34153a6a9a44305/notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb'}
{'CandidateDefinitionNotebookLocation': 's3://sagemaker-us-east-2-664224523979/sagemaker/DEMO-automl-dm/output/automl-2021-04-04-11-51-27-570/sagemaker-automl-candidates/pr-1-6bd5d35fb19e4be8928bd9a6825b6db1f8772be33fe34153a6a9a44305/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb', 'DataExplorationNotebookLocation': 's3://sagemaker-us-east-2-664224523979/sagemaker/DEMO-automl-dm/output/automl-20

In [8]:
import pandas as pd
from sagemaker.analytics import ExperimentAnalytics
exp = ExperimentAnalytics(
    sagemaker_session=sess,
    experiment_name=job['AutoMLJobName'] +
    '-aws-auto-ml-job')

df = exp.dataframe()
print("Number of jobs: ", len(df))

df = pd.concat([df['ObjectiveMetric - Max'],
                df.drop(['ObjectiveMetric - Max'], axis=1)], axis=1)
df.sort_values('ObjectiveMetric - Max', ascending=0)[:5]

Number of jobs:  38


Unnamed: 0,ObjectiveMetric - Max,TrialComponentName,DisplayName,SourceArn,SageMaker.ImageUri,SageMaker.InstanceCount,SageMaker.InstanceType,SageMaker.VolumeSizeInGB,_tuning_objective_metric,alpha,...,code - MediaType,code - Value,input_channel_mode,job_name,label_col,max_dataset_size,SageMaker.ImageUri - MediaType,SageMaker.ImageUri - Value,ds - MediaType,ds - Value
20,0.7708,tuning-job-1-3d01ecb1917b4fc895-001-68233452-a...,tuning-job-1-3d01ecb1917b4fc895-001-68233452-a...,arn:aws:sagemaker:us-east-2:664224523979:train...,257758044811.dkr.ecr.us-east-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,0.061822,...,,,,,,,,,,
5,0.75857,tuning-job-1-3d01ecb1917b4fc895-013-82246804-a...,tuning-job-1-3d01ecb1917b4fc895-013-82246804-a...,arn:aws:sagemaker:us-east-2:664224523979:train...,257758044811.dkr.ecr.us-east-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,0.000361,...,,,,,,,,,,
17,0.75589,tuning-job-1-3d01ecb1917b4fc895-005-c231a6b1-a...,tuning-job-1-3d01ecb1917b4fc895-005-c231a6b1-a...,arn:aws:sagemaker:us-east-2:664224523979:train...,257758044811.dkr.ecr.us-east-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,3e-06,...,,,,,,,,,,
3,0.75454,tuning-job-1-3d01ecb1917b4fc895-019-847b5d70-a...,tuning-job-1-3d01ecb1917b4fc895-019-847b5d70-a...,arn:aws:sagemaker:us-east-2:664224523979:train...,257758044811.dkr.ecr.us-east-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,0.017532,...,,,,,,,,,,
11,0.75343,tuning-job-1-3d01ecb1917b4fc895-011-afc49414-a...,tuning-job-1-3d01ecb1917b4fc895-011-afc49414-a...,arn:aws:sagemaker:us-east-2:664224523979:train...,257758044811.dkr.ecr.us-east-2.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,2e-06,...,,,,,,,,,,


In [9]:
job_best_candidate = auto_ml_job.best_candidate()
print(job_best_candidate['CandidateName'])
print(job_best_candidate['FinalAutoMLJobObjectiveMetric'])

tuning-job-1-3d01ecb1917b4fc895-001-68233452
{'MetricName': 'validation:f1', 'Value': 0.7720800042152405}


In [10]:
import boto3
job_outputs_prefix = '{}/output/{}'.format(prefix,job['AutoMLJobName'])
s3_bucket = boto3.resource('s3').Bucket(bucket)
s3_bucket.objects.filter(Prefix=job_outputs_prefix).delete()

[{'ResponseMetadata': {'RequestId': 'YBF6MVJM8YK9W405',
   'HostId': 'mEiKGQNL1+85FAXD0IN//t7k+Qi+LQSftVLx3TuC+2pH5OVnicRUk95xGAqT90LdP0LlzC2mfKA=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'mEiKGQNL1+85FAXD0IN//t7k+Qi+LQSftVLx3TuC+2pH5OVnicRUk95xGAqT90LdP0LlzC2mfKA=',
    'x-amz-request-id': 'YBF6MVJM8YK9W405',
    'date': 'Sun, 04 Apr 2021 12:52:58 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'sagemaker/DEMO-automl-dm/output/automl-2021-04-04-11-51-27-570/data-processor-models/automl-202-dpp3-1-6b4b177484114b04afd195a6343476ce6fa4e2e580b24/output/model.tar.gz'},
   {'Key': 'sagemaker/DEMO-automl-dm/output/automl-2021-04-04-11-51-27-570/transformed-data/dpp1/csv/train/chunk_47.csv.out'},
   {'Key': 'sagemaker/DEMO-automl-dm/output/automl-2021-04-04-11-51-27-570/preprocessed-data/header/headers.csv'},
   {'Key': 'sagemaker/DEMO-automl

In [None]:
import boto3
job_outputs_prefix = '{}/output/{}'.format(prefix,
job['AutoMLJobName'])
s3_bucket = boto3.resource('s3').Bucket(bucket)
s3_bucket.objects.filter(Prefix=job_outputs_prefix).delete()