# Hyperparameter Tuning using HyperDrive

Import Dependencies.

In [14]:
import os
import json
import azureml.core
from azureml.core import Workspace, Experiment, Model
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice

print("SDK version:", azureml.core.VERSION)

SDK version: 1.36.0


## Initialize workspace

Initialize a workspace object from persisted configuration. Make sure the config file is present at .\config.json

In [15]:
ws = Workspace.from_config()

print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-170936
aml-quickstarts-170936
southcentralus
b968fb36-f06a-4c76-a15f-afab68ae7667


## Create an Azure ML experiment

In [16]:
experiment_name = 'hdr-heart-experiment'

experiment=Experiment(ws, experiment_name)
run = experiment.start_logging()

## Create a Compute Cluster

In [18]:
amlcompute_cluster_name = "heart-compute"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset

### Overview
For this project, I used the [Heart Failure dataset](https://www.kaggle.com/andrewmvd/heart-failure-clinical-data) from Kaggle. 

Heart failure is a common event caused by CVDs and this dataset contains 12 features that can be used to predict mortality by heart failure.

In [20]:
key = "heart-failure"

dataset = ws.datasets[key]

df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


### Explore

In [22]:
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [24]:
# Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)


# Create the different params that you will be using during training
param_sampling = RandomParameterSampling(
    {
        '--C': choice(0.01, 0.1, 1.0, 10.0, 100.0),
        '--max_iter': choice(20, 50, 100, 120, 150)
    }
)


# Create your estimator and hyperdrive config
estimator = SKLearn(source_directory='./', 
                    entry_script='train.py',
                    compute_target=compute_target)

hdr_config = HyperDriveConfig(
    estimator=estimator, 
    hyperparameter_sampling=param_sampling, 
    policy=early_termination_policy, 
    primary_metric_name='Accuracy', 
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
    max_total_runs=20, 
    max_concurrent_runs=4
)



In [26]:
# Submit your experiment
hdr = experiment.submit(config=hdr_config)



## Run Details

In the cell below, use the `RunDetails` widget to show the different experiments.

In [36]:
RunDetails(hdr).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [30]:
hdr.wait_for_completion(show_output=True)

RunId: HD_5cc42a4c-6f00-4350-9371-ddb3d96e36c9
Web View: https://ml.azure.com/runs/HD_5cc42a4c-6f00-4350-9371-ddb3d96e36c9?wsid=/subscriptions/b968fb36-f06a-4c76-a15f-afab68ae7667/resourcegroups/aml-quickstarts-170936/workspaces/quick-starts-ws-170936&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-12-31T11:51:29.303335][API][INFO]Experiment created<END>\n""<START>[2021-12-31T11:51:30.089909][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-12-31T11:51:30.707670][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_5cc42a4c-6f00-4350-9371-ddb3d96e36c9
Web View: https://ml.azure.com/runs/HD_5cc42a4c-6f00-4350-9371-ddb3d96e36c9?wsid=/subscriptions/b968fb36-f06a-4c76-a15f-afab68ae7667/resourcegroups/aml-quickstarts-170936/workspaces/quick-starts-ws-170936&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_5cc42a4c-6f00-4350-9371-ddb3d96e36c9',
 'target': 'heart-compute',
 'status': 'Completed',
 'startTimeUtc': '2021-12-31T11:51:28.988792Z',
 'endTimeUtc': '2021-12-31T11:59:11.087048Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '2dbe0efa-b5df-432b-b7ff-6a49c071110f',
  'user_agent': 'python/3.6.9 (Linux-5.4.0-1063-azure-x86_64-with-debian-buster-sid) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.36.0',
  'space_size': '25',
  'score': '0.7888888888888889',
  'best_child_run_id': 'HD_5cc42a4c-6f00-4350-9371-ddb3d96e36c9_5',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg170936.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_5cc42a4c-6f00-4350-9371-ddb3d96e36c9/azureml

## Best Model

Get the best model from the hyperdrive experiments and display all the properties of the model.

In [33]:
best_run = hdr.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

In [34]:
print("Best Run Id : {}".format(best_run.id), 
      "Accuracy : {}".format(best_run_metrics['Accuracy']), 
      "Best metrics : {}".format(best_run_metrics), sep = '\n')

Best Run Id : HD_7eaab4a5-cf4c-42b5-9011-aefeb90f0984_6
Accuracy : 0.7888888888888889
Best metrics : {'Regularization Strength:': 100.0, 'Max iterations:': 20, 'Accuracy': 0.7888888888888889}


In [35]:
# Save the best model
model = best_run.register_model(model_name='heart-failure-best-model-hdr', model_path='./')

## Model Deployment

This model was not deployed as it had a lower Accuracy.