In [37]:
import boto3
import sagemaker
import os

region = boto3.Session().region_name
sage_client = boto3.Session().client('sagemaker')

## You must have already run a hyperparameter tuning job to analyze it here.
## The Hyperparameter tuning jobs you have run are listed in the Training section on your SageMaker dashboard.
## Copy the name of a completed job you want to analyze from that list.
## For example: tuning_job_name = 'mxnet-training-201007-0054'.
tuning_job_name = 'rigl-tuning-201101-0535'

In [38]:
# run this cell to check current status of hyperparameter tuning job
tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuning_job_name)

status = tuning_job_result['HyperParameterTuningJobStatus']
if status != 'Completed':
    print('Reminder: the tuning job has not been completed.')
    
job_count = tuning_job_result['TrainingJobStatusCounters']['Completed']
print("%d training jobs have completed" % job_count)
    
is_minimize = (tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['Type'] != 'Maximize')
objective_name = tuning_job_result['HyperParameterTuningJobConfig']['HyperParameterTuningJobObjective']['MetricName']

Reminder: the tuning job has not been completed.
40 training jobs have completed


In [39]:
from pprint import pprint
if tuning_job_result.get('BestTrainingJob',None):
    print("Best model found so far:")
    pprint(tuning_job_result['BestTrainingJob'])
else:
    print("No training jobs have reported results yet.")


Best model found so far:
{'CreationTime': datetime.datetime(2020, 11, 2, 11, 22, 30, tzinfo=tzlocal()),
 'FinalHyperParameterTuningJobObjectiveMetric': {'MetricName': 'top1-accuracy',
                                                 'Value': 66.3280029296875},
 'ObjectiveStatus': 'Succeeded',
 'TrainingEndTime': datetime.datetime(2020, 11, 2, 18, 47, 41, tzinfo=tzlocal()),
 'TrainingJobArn': 'arn:aws:sagemaker:us-west-2:277413220383:training-job/rigl-tuning-201101-0535-033-e6a31705',
 'TrainingJobName': 'rigl-tuning-201101-0535-033-e6a31705',
 'TrainingJobStatus': 'Completed',
 'TrainingStartTime': datetime.datetime(2020, 11, 2, 11, 25, 50, tzinfo=tzlocal()),
 'TunedHyperParameters': {'alpha': '"0.3"',
                          'delta': '619',
                          'grad-accumulation-n': '94'}}


In [40]:
import pandas as pd

tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)

full_df = tuner.dataframe()

if len(full_df) > 0:
    df = full_df[full_df['FinalObjectiveValue'] > -float('inf')]
    if len(df) > 0:
        df = df.sort_values('FinalObjectiveValue', ascending=is_minimize)
        print("Number of training jobs with valid objective: %d" % len(df))
        print({"lowest":min(df['FinalObjectiveValue']),"highest": max(df['FinalObjectiveValue'])})
        pd.set_option('display.max_colwidth', -1)  # Don't truncate TrainingJobName        
    else:
        print("No training jobs have reported valid results yet.")
        
df

Number of training jobs with valid objective: 40
{'lowest': 64.45999908447266, 'highest': 66.3280029296875}




Unnamed: 0,alpha,delta,grad-accumulation-n,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
15,"""0.3""",619.0,94.0,rigl-tuning-201101-0535-033-e6a31705,Completed,66.328003,2020-11-02 11:25:50+00:00,2020-11-02 18:47:41+00:00,26511.0
14,"""0.3""",670.0,95.0,rigl-tuning-201101-0535-034-2c052f7d,Completed,66.300003,2020-11-02 11:25:25+00:00,2020-11-02 18:48:52+00:00,26607.0
43,"""0.3""",621.0,73.0,rigl-tuning-201101-0535-005-4153805b,Completed,66.248001,2020-11-01 05:39:09+00:00,2020-11-01 13:00:31+00:00,26482.0
12,"""0.3""",598.0,79.0,rigl-tuning-201101-0535-036-7c9d9e5a,Completed,66.129997,2020-11-02 11:26:03+00:00,2020-11-02 18:47:41+00:00,26498.0
30,"""0.3""",554.0,95.0,rigl-tuning-201101-0535-018-49bc5dc5,Completed,66.042,2020-11-01 20:32:24+00:00,2020-11-02 03:59:12+00:00,26808.0
45,"""0.3""",355.0,3.0,rigl-tuning-201101-0535-003-0d89531b,Completed,66.036003,2020-11-01 05:39:06+00:00,2020-11-01 13:01:57+00:00,26571.0
21,"""0.3""",543.0,91.0,rigl-tuning-201101-0535-027-9000d1ea,Completed,66.001999,2020-11-02 03:58:54+00:00,2020-11-02 11:26:03+00:00,26829.0
28,"""0.3""",634.0,94.0,rigl-tuning-201101-0535-020-e98263b9,Completed,66.0,2020-11-01 20:32:37+00:00,2020-11-02 03:58:04+00:00,26727.0
27,"""0.3""",634.0,4.0,rigl-tuning-201101-0535-021-7dfbab04,Completed,65.954002,2020-11-01 20:32:20+00:00,2020-11-02 03:52:33+00:00,26413.0
46,"""0.5""",891.0,74.0,rigl-tuning-201101-0535-002-0f34099a,Completed,65.935997,2020-11-01 05:38:37+00:00,2020-11-01 13:01:52+00:00,26595.0


In [41]:
import bokeh
import bokeh.io
bokeh.io.output_notebook()
from bokeh.plotting import figure, show
from bokeh.models import HoverTool

class HoverHelper():

    def __init__(self, tuning_analytics):
        self.tuner = tuning_analytics

    def hovertool(self):
        tooltips = [
            ("FinalObjectiveValue", "@FinalObjectiveValue"),
            ("TrainingJobName", "@TrainingJobName"),
        ]
        for k in self.tuner.tuning_ranges.keys():
            tooltips.append( (k, "@{%s}" % k) )

        ht = HoverTool(tooltips=tooltips)
        return ht

    def tools(self, standard_tools='pan,crosshair,wheel_zoom,zoom_in,zoom_out,undo,reset'):
        return [self.hovertool(), standard_tools]

hover = HoverHelper(tuner)

p = figure(plot_width=900, plot_height=400, tools=hover.tools(), x_axis_type='datetime')
p.circle(source=df, x='TrainingStartTime', y='FinalObjectiveValue')
show(p)

In [36]:
ranges = tuner.tuning_ranges
figures = []
for hp_name, hp_range in ranges.items():
    categorical_args = {}
    if hp_range.get('Values'):
        # This is marked as categorical.  Check if all options are actually numbers.
        def is_num(x):
            try:
                float(x)
                return 1
            except:
                return 0           
        vals = hp_range['Values']
        if sum([is_num(x) for x in vals]) == len(vals):
            # Bokeh has issues plotting a "categorical" range that's actually numeric, so plot as numeric
            print("Hyperparameter %s is tuned as categorical, but all values are numeric" % hp_name)
        else:
            # Set up extra options for plotting categoricals.  A bit tricky when they're actually numbers.
            categorical_args['x_range'] = vals

    # Now plot it
    p = figure(plot_width=500, plot_height=500, 
               title="Objective vs %s" % hp_name,
               tools=hover.tools(),
               x_axis_label=hp_name, y_axis_label=objective_name,
               **categorical_args)
    p.circle(source=df, x=hp_name, y='FinalObjectiveValue')
    figures.append(p)
show(bokeh.layouts.Column(*figures))