# Hypertuning Using Hyperdrive

In [24]:
# Import Dependencies
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


## Dataset

In [25]:
#
ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')
experiment_name = 'housing-reg-3'

experiment=Experiment(ws, experiment_name)
experiment

Workspace name: quick-starts-ws-137184
Azure region: southcentralus
Subscription id: 1b944a9b-fdae-4f97-aeb1-b7eea0beac53
Resource group: aml-quickstarts-137184


Name,Workspace,Report Page,Docs Page
housing-reg-3,quick-starts-ws-137184,Link to Azure Machine Learning studio,Link to Documentation


In [3]:
## upload the local file to a datastore on the cloud
# get the datastore to upload prepared data
datastore = ws.get_default_datastore()

# upload the local file from src_dir to the target_path in datastore
datastore.upload(src_dir='data', target_path='data')

# create a dataset referencing the cloud location
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, ('data/housing_train.csv'))])

Uploading an estimated of 2 files
Uploading data/housing_train.csv
Uploaded data/housing_train.csv, 1 files out of an estimated total of 2
Uploading data/.ipynb_checkpoints/housing-prediction-wrangling-checkpoint.ipynb
Uploaded data/.ipynb_checkpoints/housing-prediction-wrangling-checkpoint.ipynb, 2 files out of an estimated total of 2
Uploaded 2 files


In [4]:
#register the dataset
dataset = dataset.register(workspace=ws,
                                 name='Housing Dataset',
                                 description='House Price training data')

## Aml-Compute

In [26]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
# Create compute cluster
# max_nodes should be no greater than 4.

# choose a name for your cluster
cluster_name = "housing-compute"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS3_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=30)
    
 # use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing compute target
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-02-04T08:25:39.193000+00:00', 'errors': None, 'creationTime': '2021-02-04T07:17:20.023748+00:00', 'modifiedTime': '2021-02-04T07:17:35.912030+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_DS3_V2'}


## HyperDrive Configuration

In [27]:
from azureml.widgets import RunDetails
from azureml.core import ScriptRunConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice
import os
from azureml.core import Environment

# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
        '--alpha': choice(0.1,0.2,0.3,0.4),
        '--max_iter': choice(10,100,1000)
       
    })

# set up the hyperdrive environment
env = Environment.from_conda_specification(
        name='l_env',
        file_path='./l_env.yml'    
        )

#TODO: Create your estimator and hyperdrive config
#Estimators are deprecated with the 1.19.0 release of the Python SDK.
#https://docs.microsoft.com/en-us/azure/machine-learning/how-to-migrate-from-estimators-to-scriptrunconfig

src = ScriptRunConfig(source_directory='.',
                      script='train.py',
                      compute_target = compute_target,
                      environment=env)

hyperdrive_run_config = HyperDriveConfig(run_config=src,
                             hyperparameter_sampling=param_sampling,
                             policy=early_termination_policy,
                             primary_metric_name="root_mean_squared_error",
                             primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
                             max_total_runs=20,
                             max_concurrent_runs=4)

In [28]:
#submit your experiment
hyperdrive_run=experiment.submit(hyperdrive_run_config, show_output=True)

In [29]:
from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [30]:
hyperdrive_run.wait_for_completion(show_output=False)  # specify True for a verbose log

{'runId': 'HD_695a5f9d-d697-4c42-a18f-45a9e41a85cf',
 'target': 'housing-compute',
 'status': 'Completed',
 'startTimeUtc': '2021-02-04T08:39:38.010155Z',
 'endTimeUtc': '2021-02-04T08:49:10.739018Z',
 'properties': {'primary_metric_config': '{"name": "root_mean_squared_error", "goal": "minimize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '4c92d24e-5a96-47b6-9041-e802ec5e42a9',
  'score': '52569.7742155477',
  'best_child_run_id': 'HD_695a5f9d-d697-4c42-a18f-45a9e41a85cf_10',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg137184.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_695a5f9d-d697-4c42-a18f-45a9e41a85cf/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=dgWLb%2F2eELdmNdRlbuODi0zwcic4Q73e%2BiCa1RQxwsc%3D&st=2021-02-04T08%3A39%3A37Z&se=2021-02-04T16%3A49%3A37Z&sp=r'},
 'submittedBy': '

In [32]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']
print('Best Run Id:',best_run.id)
print('\n Root Mean Squared Error', best_run_metrics['root_mean_squared_error'])
print (parameter_values)

Best Run Id: HD_695a5f9d-d697-4c42-a18f-45a9e41a85cf_10

 Root Mean Squared Error 52569.7742155477
['--alpha', '0.4', '--max_iter', '1000']


In [33]:
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

model = best_run.register_model(model_name='housing-reg', 
                           model_path='outputs/model.joblib',
                           model_framework=Model.Framework.SCIKITLEARN,
                           model_framework_version='0.19.1',
                           resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5))