# Hypertuning Using Hyperdrive

In [None]:
# Import Dependencies
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

## Dataset

In [None]:
#
ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')
experiment_name = 'housing-reg'

experiment=Experiment(ws, experiment_name)
experiment

In [None]:
## upload the local file to a datastore on the cloud
# get the datastore to upload prepared data
datastore = ws.get_default_datastore()

# upload the local file from src_dir to the target_path in datastore
datastore.upload(src_dir='data/housing_train.scv', target_path='data')

# create a dataset referencing the cloud location
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, ('data/housing_train.csv'))])

In [None]:
#register the dataset
housing_ds = housing_ds.register(workspace=workspace,
                                 name='Housing Dataset',
                                 description='House Price training data')

## Aml-Compute

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
# Create compute cluster
# max_nodes should be no greater than 4.

# choose a name for your cluster
cluster_name = "housing-compute"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS3_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=30)
    
 # use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

## HyperDrive Configuration

In [None]:
from azureml.widgets import RunDetails
from azureml.core import ScriptRunConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice
import os
from azureml.core import Environment

# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
        '--alpha': choice(0.1,0.2,0.3,0.4),
        '--max_iter': choice(10,100,1000)
       
    })

# set up the hyperdrive environment
env = Environment.from_conda_specification(
        name='rf_env',
        file_path='./rf_env.yml'    
        )

#TODO: Create your estimator and hyperdrive config
#Estimators are deprecated with the 1.19.0 release of the Python SDK.
#https://docs.microsoft.com/en-us/azure/machine-learning/how-to-migrate-from-estimators-to-scriptrunconfig

src = ScriptRunConfig(source_directory='.',
                      script='train.py',
                      compute_target = compute_target,
                      environment=env)

hyperdrive_run_config = HyperDriveConfig(run_config=src,
                             hyperparameter_sampling=param_sampling,
                             policy=early_termination_policy,
                             primary_metric_name="RMSE",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs=20,
                             max_concurrent_runs=4)

In [None]:
#Submit your experiment
hyperdrive_run = experiment.submit(hyperdrive_run_config, show_output=True)

## Run Details

In [None]:
from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()

In [None]:
run.wait_for_completion()

## Best Model

In [None]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('\n RMSE:', best_run_metrics['RMSE'])
print(parameter_values)

In [None]:
best_run.register_model('best_model_reg', model_path = 'outputs/model.joblib')