# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import logging
import os
import numpy as np
import pandas as pd
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace

### Initialising experiment

In [52]:
ws = Workspace.from_config()
experiment_name = 'titanic_dataset_hyperdrive'
project_folder = '.'
os.makedirs(project_folder, exist_ok = True)
experiment=Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
titanic_dataset_hyperdrive,quick-starts-ws-136108,Link to Azure Machine Learning studio,Link to Documentation


### Creating or checking for existing compute cluster

In [55]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

amlcompute_name = "computecluster2"
try:
    compute_target = ComputeTarget(workspace = ws, name = amlcompute_name)
    print("Found existing. Use it.")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size = 'STANDARD_D2_V2', max_nodes = 4)
    compute_target = ComputeTarget.create(ws, amlcompute_name, compute_config)
compute_target.wait_for_completion(show_output = True)

Found existing. Use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Preparing data

In [47]:
import pandas as pd

#Loading the titanic dataset
data = pd.read_csv('titanic_dataset.csv')

In [48]:
data.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [49]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.sampling import BayesianParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive.parameter_expressions import choice
import os

#Specify a parameter sampler (Bayesian sampling)
ps = BayesianParameterSampling({'--C': uniform(0.01,1),'--max_iter': choice(100, 150, 200, 250, 300)})

#Create a directory 'training'
if "training" not in os.listdir():
    os.mkdir("./training")

#Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory='./',
                compute_target=compute_target,
                entry_script='train.py')

#Create a HyperDriveConfig using the estimator and hyperparameter sampler.
hyperdrive_config = HyperDriveConfig(
                                   hyperparameter_sampling = ps,
                                   primary_metric_name = 'accuracy',
                                   primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                   max_total_runs = 20,
                                   max_concurrent_runs = 4,
                                   policy = None,
                                   estimator = est)


For best results with Bayesian Sampling we recommend using a maximum number of runs greater than or equal to 20 times the number of hyperparameters being tuned. Recommendend value:40.


In [50]:

from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.sampling import BayesianParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive.parameter_expressions import choice
import os

#Specify a parameter sampler (Bayesian sampling)
ps = BayesianParameterSampling({'--C': uniform(0.01,1),'--max_iter': choice(100, 150, 200, 250, 300)})

#Create a directory 'training'
if "training" not in os.listdir():
    os.mkdir("./training")

#Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory='./',
                compute_target=compute_target,
                entry_script='train.py')

#Create a HyperDriveConfig using the estimator and hyperparameter sampler.
hyperdrive_config = HyperDriveConfig(
                                   hyperparameter_sampling = ps,
                                   primary_metric_name = 'accuracy',
                                   primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,
                                   max_total_runs = 20,
                                   max_concurrent_runs = 4,
                                   policy = None,
                                   estimator = est)



## Run Details

In [51]:
#TODO: Submit your experiment
hyperdrive_run = experiment.submit(hyperdrive_config, show_output=True)
RunDetails(hyperdrive_run).show()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [12]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_61a03ee9-de98-4e9e-b876-9bac1c356cbb
Web View: https://ml.azure.com/experiments/titanic_dataset_hyperdrive/runs/HD_61a03ee9-de98-4e9e-b876-9bac1c356cbb?wsid=/subscriptions/3d1a56d2-7c81-4118-9790-f85d1acf0c77/resourcegroups/aml-quickstarts-136108/workspaces/quick-starts-ws-136108

Execution Summary
RunId: HD_61a03ee9-de98-4e9e-b876-9bac1c356cbb
Web View: https://ml.azure.com/experiments/titanic_dataset_hyperdrive/runs/HD_61a03ee9-de98-4e9e-b876-9bac1c356cbb?wsid=/subscriptions/3d1a56d2-7c81-4118-9790-f85d1acf0c77/resourcegroups/aml-quickstarts-136108/workspaces/quick-starts-ws-136108



{'runId': 'HD_61a03ee9-de98-4e9e-b876-9bac1c356cbb',
 'target': 'computecluster2',
 'status': 'Completed',
 'startTimeUtc': '2021-01-26T10:50:14.546018Z',
 'endTimeUtc': '2021-01-26T11:03:53.801396Z',
 'properties': {'primary_metric_config': '{"name": "accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'ccb75f22-1054-49d6-bd41-49508188e3e1',
  'score': '0.8603351955307262',
  'best_child_run_id': 'HD_61a03ee9-de98-4e9e-b876-9bac1c356cbb_18',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg136108.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_61a03ee9-de98-4e9e-b876-9bac1c356cbb/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=cwlYzmG6ktDnf9sy4RF2L0OPtXPSIdgE0G7b9Ft8iWU%3D&st=2021-01-26T10%3A54%3A23Z&se=2021-01-26T19%3A04%3A23Z&sp=r'},
 'submittedBy': 'ODL_User 136108'}

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [13]:
import joblib
from azureml.core.model import Model

#Get the best run.
best_run_hd = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics_hd = best_run_hd.get_metrics()
print("Best Run Id: ", best_run_hd.id)
print("Accuracy: ", best_run_metrics_hd['accuracy'])

Best Run Id:  HD_61a03ee9-de98-4e9e-b876-9bac1c356cbb_18
Accuracy:  0.8603351955307262


In [60]:
#TODO: Save the best model
model_hd = best_run_hd.register_model(model_name='hyperdrive_best_model', 
                                model_path='outputs/model.pkl',
                                model_framework=Model.Framework.SCIKITLEARN, 
                                model_framework_version='0.19.1')
print("Model successfully saved.")

Model successfully saved.


## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [62]:
best_run_hd.get_file_names()

['azureml-logs/55_azureml-execution-tvmps_ba93f81660b2739bd71c543559d21cd852806448dda645e3fcc7bcf7a0add16e_d.txt',
 'azureml-logs/65_job_prep-tvmps_ba93f81660b2739bd71c543559d21cd852806448dda645e3fcc7bcf7a0add16e_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_ba93f81660b2739bd71c543559d21cd852806448dda645e3fcc7bcf7a0add16e_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'logs/azureml/100_azureml.log',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/model.pkl']

TODO: In the cell below, send a request to the web service you deployed to test it.

In [63]:
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

#create the environment
myenv = Environment(name="myenv")
conda_dep = CondaDependencies()

#Define the packages needed by the model and scripts
conda_dep.add_conda_package("tensorflow")
conda_dep.add_conda_package("numpy")
conda_dep.add_conda_package("scikit-learn")
conda_dep.add_conda_package("py-xgboost")
#You must list azureml-defaults as a pip dependency
conda_dep.add_pip_package("azureml-defaults")
conda_dep.add_pip_package("keras")
conda_dep.add_pip_package("gensim")
conda_dep.add_pip_package("azureml-automl-core")
conda_dep.add_pip_package("azureml-automl-runtime")
conda_dep.add_pip_package("packaging")
#conda_dep.save_to_file(base_directory='./', conda_file_path='myenv.yml')
#myenv = Environment.from_conda_specification(name='myenv', file_path='myenv.yml')
myenv.python.conda_dependencies=conda_dep
#conda_dep.save_to_file(base_directory='./outputs', conda_file_path='myenv.yml')
#best_run.download_file(constants.CONDA_ENV_FILE_PATH, 'myenv.yml')

inference_config = InferenceConfig(entry_script="score1.py", environment=myenv)

In [65]:
from azureml.core.model import Model
from azureml.core.webservice import AciWebservice
#from azureml.core.webservice import LocalWebservice
from azureml.core.webservice import Webservice
from azureml.train.automl import *

deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1, auth_enabled=True, enable_app_insights=True)
#deployment_config = LocalWebservice.deploy_configuration()

#model = Model(ws, name='hyperdrive_pred')
print(model)
service = Model.deploy(ws, 'titanicpred3', [model], inference_config, deployment_config)

service.wait_for_deployment(True)
print(service.get_logs)
print(service.state)
print("scoring URI: " + service.scoring_uri)


Model(workspace=Workspace.create(name='quick-starts-ws-136108', subscription_id='3d1a56d2-7c81-4118-9790-f85d1acf0c77', resource_group='aml-quickstarts-136108'), name=hyperdrive_best_model, id=hyperdrive_best_model:1, version=1, tags={}, properties={})
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running........................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
<bound method Webservice.get_logs of AciWebservice(workspace=Workspace.create(name='quick-starts-ws-136108', subscription_id='3d1a56d2-7c81-4118-9790-f85d1acf0c77', resource_group='aml-quickstarts-136108'), name=titanicpred3, image_id=None, compute_type=None, state=ACI, scoring_uri=Healthy, tags=http://cfc6ee9c-0706-466f-9d51-c9f99c5af68b.southcentralus.azurecontainer.io/score, properties={}, created_by={'hasInferenceSchema': 'False', 'hasHtt

TODO: In the cell below, print the logs of the web service and delete the service

In [66]:
print(service.get_logs())

2021-01-26T13:56:05,818382123+00:00 - iot-server/run 
2021-01-26T13:56:05,818408024+00:00 - gunicorn/run 
2021-01-26T13:56:05,819728636+00:00 - rsyslog/run 
2021-01-26T13:56:05,822949366+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_46331db455ceb96309136c030bbd61f1/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_46331db455ceb96309136c030bbd61f1/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_46331db455ceb96309136c030bbd61f1/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_46331db455ceb96309136c030bbd61f1/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_46331db455ceb96309136c030bbd61f1/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
EdgeHubC

In [68]:
%run endpoint.py

{"result": [0, 1]}
