# Hyperparameter Tuning using HyperDrive


In [1]:
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig, Model
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.dataset import Dataset
from azureml.widgets import RunDetails
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.core.conda_dependencies import CondaDependencies
from azureml.train.hyperdrive.sampling import BayesianParameterSampling
from azureml.train.hyperdrive import choice
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.estimator import Estimator
from sklearn.preprocessing import LabelEncoder
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
import json
import requests
import pandas

In [2]:
# Creation of compute cluster to carry our the automated ML
ws = Workspace.from_config()
compute_name = "udacity-cluster"
try:
    compute = ComputeTarget(workspace=ws, name=compute_name)
    print('Compute cluster {} already exists!'.format(compute_name))
except ComputeTargetException:
    config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute = ComputeTarget.create(ws, compute_name, config)
    
compute.wait_for_completion()

## Dataset

In [3]:
# TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.
dataset_name = 'color_shades_file3'
if dataset_name in ws.datasets.keys():
        dataset = ws.datasets[dataset_name] 
else:
        url = "https://raw.githubusercontent.com/zgoey/azure_ml_capstone/master/color_shades.csv"
        dataset = Dataset.File.from_files(url)        
        dataset.register(workspace = ws, name = dataset_name,
                                 description = 'RGB values labeled with color shade names',
                                 create_new_version = True)

In [4]:
ws = Workspace.from_config()
experiment_name = 'udacity-capstone'

experiment=Experiment(ws, experiment_name)

## Hyperdrive Configuration

For this hyperdrive experiment we will use a K-nearest-neighbor classifier (kNN), which is simple and at the same time flexible enough to adapt itself to irregular decision boundaries. For a K-nearest neighvor classifier it is always a bit of challenge to find the optimal number of neighbors, so that is one of the parameters, over which we will sample. We will also investigate the effect of weighting samples by their distances as opposed to giving all neighbors the same weight by including the 'weights' parameter in our search . Finally, the distance measure is something that is interesting to vary. Here we will take an educated guess. Instead of varyting the metric, we will compare a direct kNN application to one preceded by an embedding into the L*a*b* space that was designed to be perceprually uniform (see https://en.wikipedia.org/wiki/CIELAB_color_space) and a metric learning algporithm that Neighborhood Components Analysis (see https://scikit-learn.org/stable/modules/neighbors.html, section 1.6.7), which has been devised as an improvement kNN with the standard Euclidean distance metric.

We use Bayesian parameter sampling, because our hyperparamter sample space is relatively small and we have enough budget to explore. We do no set an early termination policy, because this is not supported when using Bayesian sampling. The maximal number of runs is set to 100, which is according to the recommendation to the recoomendation to set the maximum number of runs greater than or equal to 20 times the number of hyperparameters being tuned (see https://docs.microsoft.com/en-us/azure/machine-learning/how-to-tune-hyperparameters#define-search-space). The maximum number of concurrent runs is set to 1, to let each run beenfit fullt from previously completed runs, which will enhance the sampling convergence.

We set the primary metric to accuracy, which is the most common measure used for classification tasks. Further details of the traning procesure can be found in the file train.py, which defined the estimator that we use in our experiment.


In [5]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.

#TODO: Create the different params that you will be using during training
param_sampling =   BayesianParameterSampling(
    {
        '--n_neighbors': choice(range(1,101)),
        '--weights': choice(range(2)),
        '--embedding': choice(range(3))
    }
)

#TODO: Create your estimator and hyperdrive config
# estimator = Estimator(source_directory=".",
#                 entry_script="train.py",
#                 compute_target=compute,
#                 conda_packages=['scikit-learn==0.21.3', 'pandas==0.23.4']
#                )

# hyperdrive_run_config = HyperDriveConfig(
#     estimator=estimator,
#     hyperparameter_sampling=param_sampling,
#     primary_metric_name='Accuracy',
#     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
#     max_total_runs = 150,
#     max_concurrent_runs=1
#     )

#  We got a warning saying:
#  WARNING:azureml.train.estimator._estimator:'Estimator' is deprecated. Please use 'ScriptRunConfig' from 
# 'azureml.core.script_run_config' with your own defined environment or an Azure ML curated environment. 
#  Because of this, we are now using ScriptRunConfig instead.

hyperdrive_env = Environment("hyperdrive_environment")
hyperdrive_env.docker.enabled = True
hyperdrive_env.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults',
    'inference-schema[numpy-support]',
    'joblib',
    'numpy',
    'pandas', 
    'scikit-learn',
    'scikit-image',
    'labtransformer'
])
hyperdrive_env.python.conda_dependencies.save_to_file(".", "hyperdrive_env.yml")

args = ['--data_folder', dataset.as_mount()]
print(args)
src = ScriptRunConfig(source_directory=".",
                      script='train.py',
                      arguments=args,
                      compute_target=compute,
                      environment=hyperdrive_env)
hyperdrive_run_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     #max_total_runs=100,
                                     max_total_runs=1,
                                     max_concurrent_runs=1)

['--data_folder', <azureml.data.dataset_consumption_config.DatasetConsumptionConfig object at 0x7f2b1cf676a0>]


For best results with Bayesian Sampling we recommend using a maximum number of runs greater than or equal to 20 times the number of hyperparameters being tuned. Recommendend value:60.


In [6]:
#TODO: Submit your experiment
hyperdrive_run = experiment.submit(hyperdrive_run_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?


In [7]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [8]:
hyperdrive_run.wait_for_completion()

{'runId': 'HD_e0c550c5-8a94-4c81-b895-51a4c84e74ec',
 'target': 'udacity-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-02-16T05:15:10.668328Z',
 'endTimeUtc': '2021-02-16T05:23:19.752889Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '2c9f37ef-c3e2-4d9c-94a9-beea74fa6b56',
  'score': '0.7672235481304693',
  'best_child_run_id': 'HD_e0c550c5-8a94-4c81-b895-51a4c84e74ec_0',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://experimental2494278274.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_e0c550c5-8a94-4c81-b895-51a4c84e74ec/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=Tk8xHt53z8juqTzKsgF1w9oLqoNiuMN0ajgb1N8sdG4%3D&st=2021-02-16T05%3A13%3A34Z&se=2021-02-16T13%3A23%3A34Z&sp=r'},
 'submittedBy': 'Zoé Goey

## Best Model

In [9]:
best_hyperdrive_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_hyperdrive_run.get_metrics())
print(best_hyperdrive_run.get_file_names())

{'Number of neighbors:': 53, 'Sample weights:': 'uniform', 'Feature embedding:': 'none', 'Accuracy': 0.7672235481304693}
['azureml-logs/55_azureml-execution-tvmps_e9268bfec6d61b848dab88e0db95d750eb296311bb3f84474294a4b647062b66_d.txt', 'azureml-logs/65_job_prep-tvmps_e9268bfec6d61b848dab88e0db95d750eb296311bb3f84474294a4b647062b66_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_e9268bfec6d61b848dab88e0db95d750eb296311bb3f84474294a4b647062b66_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/79_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/engine_spans_5b235d7e-5410-44b0-9a21-f7a5abc46893.jsonl', 'logs/azureml/dataprep/engine_spans_e06dd214-f369-4418-af4b-32331b651480.jsonl', 'logs/azureml/dataprep/python_span_5b235d7e-5410-44b0-9a21-f7a5abc46893.jsonl', 'logs/azureml/dataprep/python_span_e06dd214-f369-4418-af4b-32331b651480.j

In [10]:
#TODO: Save the best model
os.makedirs('./models', exist_ok=True)
best_hyperdrive_run.download_file(name='outputs/model.pkl', output_file_path='./models/hyperdrive_color_shades.pkl')


## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [11]:
description = 'Best kNN model for color shade classification found by hyperdrive optimization'
model = best_hyperdrive_run.register_model(model_name='hyperdrive_color_shades', model_path='outputs/model.pkl')

inference_config = InferenceConfig(entry_script='hyperdrive_score.py',environment=hyperdrive_env)

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1, 
                                               description = 'Hyperdrive optimized kNN for color shade classification')

aci_service_name = 'hyperdrive-color-shade'
print(aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig, overwrite=True)
aci_service.wait_for_deployment(True)
print(aci_service.state)

hyperdrive-color-shade
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running............................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


TODO: In the cell below, send a request to the web service you deployed to test it.

In [12]:
data = {"data": [[255, 10, 2]]}

# Convert to JSON string
input_data = json.dumps(data)

# Set the content type
headers = {"Content-Type": "application/json"}

# Make the request and display the response
resp = requests.post(aci_service.scoring_uri, input_data, headers=headers)

le = LabelEncoder()
df = pandas.read_csv(dataset.download(overwrite=True)[0])
le.fit_transform(df['Shade'])
print(le.inverse_transform(resp.json()))

['Red']


TODO: In the cell below, print the logs of the web service and delete the service

In [13]:
print(aci_service.get_logs())
aci_service.delete()

2021-02-16T05:26:17,476634927+00:00 - iot-server/run 
2021-02-16T05:26:17,480937144+00:00 - gunicorn/run 
2021-02-16T05:26:17,484658660+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_d841061c7115848f6b242feba3e659bf/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_d841061c7115848f6b242feba3e659bf/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_d841061c7115848f6b242feba3e659bf/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_d841061c7115848f6b242feba3e659bf/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_d841061c7115848f6b242feba3e659bf/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
2021-02-16T05:26:17,489562680+00:00 - rsyslog/run 
EdgeHubC

## Cleanup

In [14]:
# Clean up compute cluster
try:
    compute = ComputeTarget(workspace=ws, name=compute_name)
    try:
        compute.delete()
    except ComputeTargetException as e:
        print(e.message)
        print("Failed to clean up compute cluster {}!".format(compute_name))
    compute.wait_for_completion(show_output=True, is_delete_operation=True)
except ComputeTargetException:
    print('Compute cluster {} no longer exists!'.format(compute_name))

Deleting..Current provisioning state of AmlCompute is "Deleting"

.....Current provisioning state of AmlCompute is "Deleting"

......Current provisioning state of AmlCompute is "Deleting"

.....Current provisioning state of AmlCompute is "Deleting"

......Current provisioning state of AmlCompute is "Deleting"

......Current provisioning state of AmlCompute is "Deleting"

.....Current provisioning state of AmlCompute is "Deleting"

....
SucceededProvisioning operation finished, operation "Succeeded"
