In [13]:
import boto3
import sagemaker
import os

region = boto3.Session().region_name
sage_client = boto3.Session().client('sagemaker')

## You must have already run a hyperparameter tuning job to analyze it here.
## The Hyperparameter tuning jobs you have run are listed in the Training section on your SageMaker dashboard.
## Copy the name of a completed job you want to analyze from that list.
## For example: tuning_job_name = 'mxnet-training-201007-0054'.
tuning_job_name = 'rigl-tuning-201031-0739'

In [14]:
# run this cell to check current status of hyperparameter tuning job
tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)

status = tuning_job_result['HyperParameterTuningJobStatus']
if status != 'Completed':
    print('Reminder: the tuning job has not been completed.')
    
job_count = tuning_job_result['TrainingJobStatusCounters']['Completed']
print("%d training jobs have completed" % job_count)
    
is_minimize = (tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['Type'] != 'Maximize')
objective_name = tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['MetricName']

45 training jobs have completed


In [15]:
from pprint import pprint
if tuning_job_result.get('BestTrainingJob',None):
    print("Best model found so far:")
    pprint(tuning_job_result['BestTrainingJob'])
else:
    print("No training jobs have reported results yet.")


Best model found so far:
{'CreationTime': datetime.datetime(2020, 10, 31, 7, 39, 29, tzinfo=tzlocal()),
 'FinalHyperParameterTuningJobObjectiveMetric': {'MetricName': 'top1-accuracy',
                                                 'Value': 30.266000747680664},
 'ObjectiveStatus': 'Succeeded',
 'TrainingEndTime': datetime.datetime(2020, 10, 31, 8, 49, 14, tzinfo=tzlocal()),
 'TrainingJobArn': 'arn:aws:sagemaker:us-west-2:277413220383:training-job/rigl-tuning-201031-0739-004-00fd2b7a',
 'TrainingJobName': 'rigl-tuning-201031-0739-004-00fd2b7a',
 'TrainingJobStatus': 'Completed',
 'TrainingStartTime': datetime.datetime(2020, 10, 31, 7, 42, 56, tzinfo=tzlocal()),
 'TunedHyperParameters': {'alpha': '"0.3"',
                          'delta': '316',
                          'grad-accumulation-n': '47'}}


In [16]:
import pandas as pd

tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)

full_df = tuner.dataframe()

if len(full_df) > 0:
    df = full_df[full_df['FinalObjectiveValue'] > -float('inf')]
    if len(df) > 0:
        df = df.sort_values('FinalObjectiveValue', ascending=is_minimize)
        print("Number of training jobs with valid objective: %d" % len(df))
        print({"lowest":min(df['FinalObjectiveValue']),"highest": max(df['FinalObjectiveValue'])})
        pd.set_option('display.max_colwidth', -1)  # Don't truncate TrainingJobName        
    else:
        print("No training jobs have reported valid results yet.")
        
df

Number of training jobs with valid objective: 99
{'lowest': 0.3479999899864197, 'highest': 30.266000747680664}




Unnamed: 0,alpha,delta,grad-accumulation-n,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
97,"""0.3""",316.0,47.0,rigl-tuning-201031-0739-004-00fd2b7a,Completed,30.266001,2020-10-31 07:42:56+00:00,2020-10-31 08:49:14+00:00,3978.0
12,"""0.3""",797.0,95.0,rigl-tuning-201031-0739-089-9513865d,Completed,29.760000,2020-10-31 18:23:35+00:00,2020-10-31 19:29:45+00:00,3970.0
8,"""0.3""",975.0,91.0,rigl-tuning-201031-0739-093-b9ece616,Completed,29.733999,2020-10-31 18:33:13+00:00,2020-10-31 19:39:27+00:00,3974.0
71,"""0.3""",631.0,87.0,rigl-tuning-201031-0739-030-9c3df50c,Completed,29.629999,2020-10-31 10:34:12+00:00,2020-10-31 11:40:55+00:00,4003.0
17,"""0.3""",998.0,13.0,rigl-tuning-201031-0739-084-db4fbeec,Completed,29.486000,2020-10-31 17:22:27+00:00,2020-10-31 18:28:53+00:00,3986.0
...,...,...,...,...,...,...,...,...,...
40,"""0.3""",512.0,2.0,rigl-tuning-201031-0739-061-a5ce2045,Stopped,1.342000,2020-10-31 14:34:22+00:00,2020-10-31 15:19:54+00:00,2732.0
51,"""0.3""",741.0,95.0,rigl-tuning-201031-0739-050-6fc93764,Stopped,1.326000,2020-10-31 13:23:49+00:00,2020-10-31 14:10:50+00:00,2821.0
4,"""0.3""",998.0,3.0,rigl-tuning-201031-0739-097-840fd742,Stopped,1.208000,2020-10-31 19:12:45+00:00,2020-10-31 20:02:15+00:00,2970.0
30,"""0.3""",866.0,1.0,rigl-tuning-201031-0739-071-a517c522,Stopped,1.110000,2020-10-31 15:53:11+00:00,2020-10-31 16:39:42+00:00,2791.0


In [17]:
import bokeh
import bokeh.io
bokeh.io.output_notebook()
from bokeh.plotting import figure, show
from bokeh.models import HoverTool

class HoverHelper():

    def __init__(self, tuning_analytics):
        self.tuner = tuning_analytics

    def hovertool(self):
        tooltips = [
            ("FinalObjectiveValue", "@FinalObjectiveValue"),
            ("TrainingJobName", "@TrainingJobName"),
        ]
        for k in self.tuner.tuning_ranges.keys():
            tooltips.append( (k, "@{%s}" % k) )

        ht = HoverTool(tooltips=tooltips)
        return ht

    def tools(self, standard_tools='pan,crosshair,wheel_zoom,zoom_in,zoom_out,undo,reset'):
        return [self.hovertool(), standard_tools]

hover = HoverHelper(tuner)

p = figure(plot_width=900, plot_height=400, tools=hover.tools(), x_axis_type='datetime')
p.circle(source=df, x='TrainingStartTime', y='FinalObjectiveValue')
show(p)

In [18]:
ranges = tuner.tuning_ranges
figures = []
for hp_name, hp_range in ranges.items():
    categorical_args = {}
    if hp_range.get('Values'):
        # This is marked as categorical.  Check if all options are actually numbers.
        def is_num(x):
            try:
                float(x)
                return 1
            except:
                return 0           
        vals = hp_range['Values']
        if sum([is_num(x) for x in vals]) == len(vals):
            # Bokeh has issues plotting a "categorical" range that's actually numeric, so plot as numeric
            print("Hyperparameter %s is tuned as categorical, but all values are numeric" % hp_name)
        else:
            # Set up extra options for plotting categoricals.  A bit tricky when they're actually numbers.
            categorical_args['x_range'] = vals

    # Now plot it
    p = figure(plot_width=500, plot_height=500, 
               title="Objective vs %s" % hp_name,
               tools=hover.tools(),
               x_axis_label=hp_name, y_axis_label=objective_name,
               **categorical_args)
    p.circle(source=df, x=hp_name, y='FinalObjectiveValue')
    figures.append(p)
show(bokeh.layouts.Column(*figures))