In [1]:
import boto3
import sagemaker
import os

region = boto3.Session().region_name
sage_client = boto3.Session().client('sagemaker')

tuning_job_name = 'tensorflow-training-191120-1317'

In [2]:
tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)

In [3]:
tuning_job_result

{'HyperParameterTuningJobName': 'tensorflow-training-191120-1317',
 'HyperParameterTuningJobArn': 'arn:aws:sagemaker:us-east-1:869530972998:hyper-parameter-tuning-job/tensorflow-training-191120-1317',
 'HyperParameterTuningJobConfig': {'Strategy': 'Bayesian',
  'HyperParameterTuningJobObjective': {'Type': 'Maximize',
   'MetricName': 'f1_score'},
  'ResourceLimits': {'MaxNumberOfTrainingJobs': 16,
   'MaxParallelTrainingJobs': 2},
  'ParameterRanges': {'IntegerParameterRanges': [{'Name': 'epochs',
     'MinValue': '1',
     'MaxValue': '10',
     'ScalingType': 'Auto'},
    {'Name': 'batch-size',
     'MinValue': '16',
     'MaxValue': '1024',
     'ScalingType': 'Logarithmic'},
    {'Name': 'layers',
     'MinValue': '1',
     'MaxValue': '4',
     'ScalingType': 'Auto'},
    {'Name': 'dense-layer',
     'MinValue': '4',
     'MaxValue': '64',
     'ScalingType': 'Auto'}],
   'ContinuousParameterRanges': [{'Name': 'learning-rate',
     'MinValue': '0.001',
     'MaxValue': '0.1',
    

In [4]:
status = tuning_job_result['HyperParameterTuningJobStatus']
if status != 'Completed':
    print('Reminder: the tuning job has not been completed.')

In [6]:
# get a total count of completed jobs
job_count = tuning_job_result['TrainingJobStatusCounters']['Completed']
print("%d training jobs have completed" % job_count)
    
is_minimize = (tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['Type'] != 'Maximize')
objective_name = tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['MetricName']

16 training jobs have completed


In [7]:
from pprint import pprint
if tuning_job_result.get('BestTrainingJob',None):
    print("Best model found so far:")
    pprint(tuning_job_result['BestTrainingJob'])
else:
    print("No training jobs have reported results yet.")

Best model found so far:
{'CreationTime': datetime.datetime(2019, 11, 20, 13, 32, 44, tzinfo=tzlocal()),
 'FinalHyperParameterTuningJobObjectiveMetric': {'MetricName': 'f1_score',
                                                 'Value': 0.4595000147819519},
 'ObjectiveStatus': 'Succeeded',
 'TrainingEndTime': datetime.datetime(2019, 11, 20, 13, 37, 15, tzinfo=tzlocal()),
 'TrainingJobArn': 'arn:aws:sagemaker:us-east-1:869530972998:training-job/tensorflow-training-191120-1317-009-61fc9499',
 'TrainingJobName': 'tensorflow-training-191120-1317-009-61fc9499',
 'TrainingJobStatus': 'Completed',
 'TrainingStartTime': datetime.datetime(2019, 11, 20, 13, 35, 21, tzinfo=tzlocal()),
 'TunedHyperParameters': {'batch-size': '16',
                          'dense-layer': '42',
                          'epochs': '9',
                          'layers': '1',
                          'learning-rate': '0.09608245871965426'}}


In [10]:
import pandas as pd

# Instantiate a tuner analytics job. (Notice it's not the same as sagemaker boto3 client)
# The boto3 and sagemaker are 2 separate libraries. 
# The tuner can be instantiated using HyperparameterTuner, but since we are only doing the analysis
# and assume that the tuning job has already completed, we are going to use the sagemaker SDK to directly
# instantiate one that takes the tuning job as the parameter. 
tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)

In [11]:
full_df = tuner.dataframe() # Already formatted in tabular form

In [12]:
full_df

Unnamed: 0,FinalObjectiveValue,TrainingElapsedTimeSeconds,TrainingEndTime,TrainingJobName,TrainingJobStatus,TrainingStartTime,batch-size,dense-layer,epochs,layers,learning-rate
0,0.0,63.0,2019-11-20 13:48:19+00:00,tensorflow-training-191120-1317-016-e5c10a9f,Completed,2019-11-20 13:47:16+00:00,693.0,6.0,4.0,2.0,0.096684
1,0.3404,74.0,2019-11-20 13:48:35+00:00,tensorflow-training-191120-1317-015-a10233d4,Completed,2019-11-20 13:47:21+00:00,276.0,35.0,3.0,4.0,0.096902
2,0.2681,54.0,2019-11-20 13:44:53+00:00,tensorflow-training-191120-1317-014-3a4cd553,Completed,2019-11-20 13:43:59+00:00,904.0,28.0,10.0,1.0,0.081131
3,0.0,119.0,2019-11-20 13:44:48+00:00,tensorflow-training-191120-1317-013-5ba68d2b,Completed,2019-11-20 13:42:49+00:00,16.0,40.0,9.0,1.0,0.094194
4,0.0,140.0,2019-11-20 13:41:46+00:00,tensorflow-training-191120-1317-012-ca5d8a4c,Completed,2019-11-20 13:39:26+00:00,16.0,59.0,9.0,3.0,0.085522
5,0.3665,95.0,2019-11-20 13:40:28+00:00,tensorflow-training-191120-1317-011-e19ed8de,Completed,2019-11-20 13:38:53+00:00,67.0,6.0,3.0,4.0,0.094358
6,0.2577,74.0,2019-11-20 13:36:19+00:00,tensorflow-training-191120-1317-010-884eb0ea,Completed,2019-11-20 13:35:05+00:00,264.0,15.0,3.0,1.0,0.091282
7,0.4595,114.0,2019-11-20 13:37:15+00:00,tensorflow-training-191120-1317-009-61fc9499,Completed,2019-11-20 13:35:21+00:00,16.0,42.0,9.0,1.0,0.096082
8,0.0,72.0,2019-11-20 13:32:45+00:00,tensorflow-training-191120-1317-008-43feabe5,Completed,2019-11-20 13:31:33+00:00,132.0,43.0,1.0,3.0,0.002095
9,0.0,78.0,2019-11-20 13:32:15+00:00,tensorflow-training-191120-1317-007-ed1abca3,Completed,2019-11-20 13:30:57+00:00,82.0,28.0,1.0,4.0,0.019686


In [13]:
df = full_df[full_df['FinalObjectiveValue'] > -float('inf')]
df = df.sort_values('FinalObjectiveValue', ascending=is_minimize)
print("Number of training jobs with valid objective: %d" % len(df))
print({"lowest":min(df['FinalObjectiveValue']),"highest": max(df['FinalObjectiveValue'])})
pd.set_option('display.max_colwidth', -1)  # Don't truncate TrainingJobName    

Number of training jobs with valid objective: 16
{'lowest': 0.0, 'highest': 0.4595000147819519}


In [16]:
#Import bokeh library for visualization

import bokeh
import bokeh.io
bokeh.io.output_notebook()

from bokeh.plotting import figure, show
from bokeh.models import HoverTool

In [17]:
# Define a class, will learn more about how to use bokeh library
class HoverHelper():

    def __init__(self, tuning_analytics):
        self.tuner = tuning_analytics

    def hovertool(self):
        tooltips = [
            ("FinalObjectiveValue", "@FinalObjectiveValue"),
            ("TrainingJobName", "@TrainingJobName"),
        ]
        for k in self.tuner.tuning_ranges.keys():
            tooltips.append( (k, "@{%s}" % k) )

        ht = HoverTool(tooltips=tooltips)
        return ht

    def tools(self, standard_tools='pan,crosshair,wheel_zoom,zoom_in,zoom_out,undo,reset'):
        return [self.hovertool(), standard_tools]


In [18]:
hover = HoverHelper(tuner)

In [32]:
p = figure(plot_width=900, plot_height=400, tools=hover.tools(), x_axis_type='datetime')

In [33]:
p.circle(source=df, x='TrainingStartTime', y='FinalObjectiveValue')

In [34]:
show(p)

In [35]:
# Now let's analyse the ranges for the hyperparameters
ranges = tuner.tuning_ranges
ranges

{'epochs': {'Name': 'epochs',
  'MinValue': '1',
  'MaxValue': '10',
  'ScalingType': 'Auto'},
 'batch-size': {'Name': 'batch-size',
  'MinValue': '16',
  'MaxValue': '1024',
  'ScalingType': 'Logarithmic'},
 'layers': {'Name': 'layers',
  'MinValue': '1',
  'MaxValue': '4',
  'ScalingType': 'Auto'},
 'dense-layer': {'Name': 'dense-layer',
  'MinValue': '4',
  'MaxValue': '64',
  'ScalingType': 'Auto'},
 'learning-rate': {'Name': 'learning-rate',
  'MinValue': '0.001',
  'MaxValue': '0.1',
  'ScalingType': 'ReverseLogarithmic'}}

In [45]:
ranges = tuner.tuning_ranges
figures = []
for hp_name, hp_range in ranges.items():
    categorical_args = {}
    if hp_range.get('Values'):
        # This is marked as categorical.  Check if all options are actually numbers.
        def is_num(x):
            try:
                float(x)
                return 1
            except:
                return 0           
        vals = hp_range['Values']
        if sum([is_num(x) for x in vals]) == len(vals):
            # Bokeh has issues plotting a "categorical" range that's actually numeric, so plot as numeric
            print("Hyperparameter %s is tuned as categorical, but all values are numeric" % hp_name)
        else:
            # Set up extra options for plotting categoricals.  A bit tricky when they're actually numbers.
            categorical_args['x_range'] = vals

    # Now plot it
    p = figure(plot_width=500, plot_height=500, 
               title="Objective vs %s" % hp_name,
               tools=hover.tools(),
               x_axis_label=hp_name, y_axis_label=objective_name,
               **categorical_args)
    p.circle(source=df, x=hp_name, y='FinalObjectiveValue')
    figures.append(p)
show(bokeh.layouts.Column(*figures))