# Hyperparameter Tuning using HyperDrive

In [1]:
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.dataset import Dataset
from azureml.train.hyperdrive import BayesianParameterSampling, uniform, choice
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.sklearn import SKLearn
from azureml.widgets import RunDetails

## Dataset

In [2]:
# Get workspace
ws = Workspace.from_config()

# Create experiment
experiment = Experiment(ws, 'hyperdrive-energyforecast')
experiment

# Get data
data_valid = ws.datasets['energy-forecast-data-validation']
data_train = ws.datasets['energy-forecast-data-training']

## Compute cluster

In [3]:
# Create compute cluster
cluster_name = "hdrive-compute"

try:
    target = ComputeTarget(workspace=ws, name=cluster_name)
except:
    config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2', max_nodes=6)
    target = ComputeTarget.create(ws, cluster_name, config)
    
target.wait_for_completion(show_output=True)
print('done')

Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
done


## Hyperdrive configuration

In this project, I selected RandomForestRegressor as the predictor. This is mainly because it's simple to configure and is known to achieve decent performance.

The selected hyperparameters to be tuned are `n_estimators`, which is the number of trees in the forest, and `max_depth`, which is the maximum depth of each tree. I decided to limit to only these 2 hyperparameters because they are the most important ones, and due to time restriction.

I selected `BayesianParameterSampling` as the hyperparameter sampling algorithm because it is proven to be efficient in searching a large parameter space when the number of searches is limited (50 in this project).

Regarding the `HyperDriveConfig` settings, the selected primary metric is R2, which is the same as the one selected to optimise the AML pipeline. This is because it is easy to analyse, the close it is to 1, the better the prediction. I decided to limit the `maximum_total_runs` to 50 due to time constraint and there are only 2 hyperparameters to tune. The `policy` argument, which is for specifying a stopping policy, was set to None because there is currently no stopping policy that supports `BayesianParameterSampling` parameter sampler.

In [4]:
# Hyperparameter sampling
param_sampler = BayesianParameterSampling({
    "--n_estimators": choice(range(50, 200)),
    "--max_depth": choice(range(3, 21))
    })

# Create estimator
est = SKLearn(
    source_directory='./src/',
    entry_script='train.py',
    compute_target=cluster_name
)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(
    estimator=est,
    hyperparameter_sampling=param_sampler,
    primary_metric_name='R2_valid',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=50,
    max_concurrent_runs=4,
    policy=None
)


'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [5]:
# Submit hyperdrive run
hyperdrive_run = experiment.submit(hyperdrive_config)



## Run Details

In [6]:
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_7e4c4d96-a9da-48da-accc-4c3a847fa555
Web View: https://ml.azure.com/experiments/hyperdrive-energyforecast/runs/HD_7e4c4d96-a9da-48da-accc-4c3a847fa555?wsid=/subscriptions/cdbe0b43-92a0-4715-838a-f2648cc7ad21/resourcegroups/aml-quickstarts-143075/workspaces/quick-starts-ws-143075

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-04-18T18:35:22.436766][API][INFO]Experiment created<END>\n""<START>[2021-04-18T18:35:23.277827][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-04-18T18:35:23.857037][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-04-18T18:35:24.0040538Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_7e4c4d96-a9da-48da-accc-4c3a847fa555
Web View: https://ml.azure.com/experiments/hyperdrive-energyforecast/runs/HD_7e4c4d96-a9da-48da-accc-4c3a847fa555?wsid=

{'runId': 'HD_7e4c4d96-a9da-48da-accc-4c3a847fa555',
 'target': 'hdrive-compute',
 'status': 'Completed',
 'startTimeUtc': '2021-04-18T18:35:22.131406Z',
 'endTimeUtc': '2021-04-18T19:04:38.047583Z',
 'properties': {'primary_metric_config': '{"name": "R2_valid", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'b156823e-0bfd-4ab7-8ddd-0fe203b2b95f',
  'score': '0.9014831149104803',
  'best_child_run_id': 'HD_7e4c4d96-a9da-48da-accc-4c3a847fa555_42',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg143075.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_7e4c4d96-a9da-48da-accc-4c3a847fa555/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=LCoHEhzaA2buqAOLP5E39PnZI3Rhd19nwxntn5jEWdE%3D&st=2021-04-18T18%3A54%3A40Z&se=2021-04-19T03%3A04%3A40Z&sp=r'}}

## Best Model

In [15]:
# Get best run details
hyperdrive_best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(hyperdrive_best_run)
print("Best params: ", hyperdrive_best_run.get_metrics())

Run(Experiment: hyperdrive-energyforecast,
Id: HD_7e4c4d96-a9da-48da-accc-4c3a847fa555_42,
Type: azureml.scriptrun,
Status: Completed)
Best params:  {'Number of estimators:': 183.0, 'Max depth:': 8, 'R2_valid': 0.9014831149104803, 'R2_train': 0.9090438936514077}


In [8]:
# Save the best model
model = hyperdrive_best_run.register_model(
    model_name='best_hyperdrive', 
    model_path='./outputs/hyperdrive_model.joblib'
)

model.download(target_dir="outputs", exist_ok=True)

'outputs\\hyperdrive_model.joblib'

## Model Deployment

In [9]:
from azureml.core import Workspace, Environment, Webservice
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice

# Create environment
env = Environment.from_conda_specification('env', 'conda_env.yaml')
inference_config = InferenceConfig(entry_script='./src/score.py', environment=env)

# Deploy to an ACI instance
aci_config = AciWebservice.deploy_configuration(
    cpu_cores=1,
    memory_gb=1
)

service = model.deploy(
    workspace=ws,
    name="energy-forecaster",
    models=[model],
    inference_config=inference_config,
    deployment_config=aci_config,
    overwrite=True
)

service.wait_for_deployment(show_output=True)

# Enable application insights
service.update(enable_app_insights=True)

# Save URI
with open('./endpoint_uri.txt', 'w') as f:
    f.write(service.scoring_uri)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running..........................................................................................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"


## Consume endpoint

In [10]:
import json
import requests

input_json = json.dumps(json.load(open('./test_payload.json')))
response = requests.post(service.scoring_uri, input_json, headers={'Content-Type':'application/json'})

prediction = json.loads(response.content)

print(prediction)

[247482.2690306603, 246589.30226048033]


In [11]:
# Get logs
print(service.get_logs())

2021-04-18T19:15:53,236413900+00:00 - rsyslog/run 
2021-04-18T19:15:53,238275800+00:00 - gunicorn/run 
2021-04-18T19:15:53,256378300+00:00 - iot-server/run 
2021-04-18T19:15:53,266762800+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_e85f239ab1a309d9f631108ab74528f7/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_e85f239ab1a309d9f631108ab74528f7/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_e85f239ab1a309d9f631108ab74528f7/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_e85f239ab1a309d9f631108ab74528f7/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_e85f239ab1a309d9f631108ab74528f7/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
EdgeHubC

In [12]:
# Delete endpoint 
service.delete()

WebserviceException: WebserviceException:
	Message: There is a deployment operation in flight for the Service: energy-forecaster
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "There is a deployment operation in flight for the Service: energy-forecaster"
    }
}