# Hyperparameter Tuning using HyperDrive

In [1]:
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core import Datastore, Experiment, Environment, ScriptRunConfig
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal, choice, uniform
from azureml.train.hyperdrive import HyperDriveRun

from azureml.pipeline.core import Pipeline, PipelineData, TrainingOutput, PipelineRun
from azureml.pipeline.steps import PythonScriptStep, HyperDriveStep, HyperDriveStepRun
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import AutoMLStep

from azureml.widgets import RunDetails

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

import gc

## Identify Azure ML Workspace by configuration

In [2]:
# Initiate default workspacea
ws = Workspace.from_config()

# Default datastore
def_blob_store = ws.get_default_datastore()

## Select Compute Target

In [3]:
cpu_cluster_name = "ml-dev-clus"

In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found an existing cluster, using it instead.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D12_V2',
                                                           min_nodes=0,
                                                           max_nodes=6)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
    cpu_cluster.wait_for_completion(show_output=True)

Found an existing cluster, using it instead.


## Create Running Environment - Docker

In [5]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE, DockerConfiguration

# create a new runconfig object
run_config = RunConfiguration()

# enable Docker 
docker_config = DockerConfiguration(use_docker=True)
run_config.docker = docker_config

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['pandas','scikit-learn','numpy'],
                                                                            pip_packages=['azureml-sdk[automl]', 'lightgbm'])

## Initiate Input Data Channel
- train_transaction
- train_identity

In [6]:
train_transaction = Dataset.get_by_name(ws, name='CIS Fraud Detection_train_transaction')
train_identity = Dataset.get_by_name(ws, name = 'CIS Fraud Detection_train_identity')

## Pipeline
### Pipeline Step 1 : Clean Data

In [7]:
source_directory="./data_prep"

cleaned_data = PipelineData("cleaned_data", datastore=def_blob_store).as_dataset()

CleanStep = PythonScriptStep(
    script_name="clean_data.py", 
    arguments=["--output_combine", cleaned_data],
    inputs=[train_transaction.as_named_input('input_transaction'), train_identity.as_named_input('input_identity')],
    outputs= [cleaned_data],
    compute_target=cpu_cluster, 
    source_directory=source_directory,
    runconfig=run_config,
    allow_reuse = True
)

### Pipeline Step 2 : Select Columns

In [8]:
source_directory="./data_prep"

selected_data = PipelineData("selected_data", datastore=def_blob_store).as_dataset()
SelectedStep = PythonScriptStep(
    script_name="select_col.py", 
    arguments=["--output_selected", selected_data],
    inputs=[cleaned_data.parse_parquet_files()],
    outputs= [selected_data],
    compute_target=cpu_cluster, 
    source_directory=source_directory,
    runconfig=run_config,
    allow_reuse = True
)

### Pipeline Step 3 : Feature Engineering

In [9]:
source_directory="./data_prep"

train_data_hd = PipelineData("train_data_hd", datastore=def_blob_store).as_dataset() ## Last input for machine learning model

FeatureEngineeringStep = PythonScriptStep(
    script_name="feature_engineering.py", 
    arguments=["--output_train_data", train_data_hd],
    inputs=[selected_data.parse_parquet_files()],
    outputs= [train_data_hd],
    compute_target=cpu_cluster, 
    source_directory=source_directory,
    runconfig=run_config,
    allow_reuse = True
)

## Hyperdrive Configuration

### Create HyperDrive Environment File

In [10]:
%%writefile hyperdrive_env.yml
name: batch_environment
dependencies:
- python=3.6.9
- scikit-learn
- pandas
- numpy
- lightgbm
- pip
- pip:
  - azureml-defaults

Overwriting hyperdrive_env.yml


In [11]:
# Create a Python environment for the experiment
hyper_env = Environment.from_conda_specification("experiment_env", "./hyperdrive_env.yml")

In [12]:
# Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval = 2, slack_factor = 0.1)

# Create the different params that you will be using during training
param_sampling = RandomParameterSampling(
    {
        '--num_leaves' : choice(350, 370, 400, 420, 450, 470, 490, 510, 530),
        '--min_child_weight' : uniform(0.02, 0.2),
        '--feature_fraction' : uniform(0.2, 0.5),
        '--bagging_fraction' : uniform(0.2, 0.5),
        '--min_data_in_leaf' : choice(range(80, 151)),
        '--max_depth' : choice(-1, 1, 2, 3, 4, 5, 6, 7, 8 , 9, 10, 15, 20 ,25, 30 ,35, 40 ,45),
        '--learning_rate' : uniform(0.001, 0.02)
    }
)

# Create estimator and hyperdrive config
training_folder="./train_model" 

train_dataset_hd = train_data_hd.parse_parquet_files()

estimator = ScriptRunConfig(source_directory = training_folder,
                            script = 'train_model.py',
                            arguments = ['--input-data', train_dataset_hd],
                            environment = hyper_env,
                            compute_target = cpu_cluster)


hyperdrive_run_config = HyperDriveConfig(run_config = estimator,
                                        hyperparameter_sampling = param_sampling,
                                        policy = early_termination_policy,
                                        primary_metric_name = 'AUC',
                                        primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                        max_total_runs = 100,
                                        max_concurrent_runs = 4)

In [13]:
hyperdrive_metrics_output_name = 'metrics_output'
hyperdrive_best_iter_outputname = 'best_iter_output'

metrics_data = PipelineData(name = "metrics_data",
                        datastore = def_blob_store,
                        pipeline_output_name = hyperdrive_metrics_output_name,
                        training_output = TrainingOutput(type = "Metrics"))

model_data = PipelineData(name = "model_data",
                          datastore = def_blob_store,
                          pipeline_output_name = hyperdrive_best_iter_outputname,
                          training_output = TrainingOutput(type= "Model",
                                                         model_file="outputs/model/hyperdrive_lgbm_fraud.pkl"))


hd_step_name = 'hd_step'
hd_step = HyperDriveStep(
            name = hd_step_name,
            hyperdrive_config = hyperdrive_run_config,
            inputs = [train_dataset_hd],
            outputs = [metrics_data, model_data]
)

In [14]:
hyperdrive_fraud_pipeline = Experiment(ws, 'fraud_detection_hyperdrive_pipeline')

In [16]:
# Submit Experiment
pipeline_steps = [hd_step]
pipeline_hd = Pipeline(workspace = ws, steps = pipeline_steps)

hyperdrive_pipeline_run = hyperdrive_fraud_pipeline.submit(pipeline_hd, regenerate_outputs = False)

Created step hd_step [b398ee08][cfef7ff6-83d4-460f-b0e7-ba64cfc683e2], (This step will run and generate new outputs)
Created step feature_engineering.py [beca33e5][e154492b-38fa-423c-8416-e6e45bb5e797], (This step is eligible to reuse a previous run's output)
Created step select_col.py [0f26992a][b70563e7-e99e-466a-bb29-cdd2bf6890ab], (This step is eligible to reuse a previous run's output)
Created step clean_data.py [e046f368][95e3d8a7-699d-4def-95d6-d07158e3c179], (This step is eligible to reuse a previous run's output)
Submitted PipelineRun e09b1dc6-f679-4f99-9b93-bbd73c06d006
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/e09b1dc6-f679-4f99-9b93-bbd73c06d006?wsid=/subscriptions/2186b060-2874-42c6-b0f7-0335ccdedb37/resourcegroups/azure-ml-eng-dev/workspaces/azureml-eng-dev-generic&tid=271d5e7b-1350-4b96-ab84-52dbda4cf40c


## Run Details

In [17]:
RunDetails(hyperdrive_pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

## Retrieve Metrics
All 100 runs metrics ('AUC') can be retrieved via Azure SDK,

In [15]:
for pipeline_run in hyperdrive_fraud_pipeline.get_runs(include_children= False):
    pipeline_run_id = pipeline_run.id

hd_pipeline_run =  PipelineRun(hyperdrive_fraud_pipeline, pipeline_run_id)

for attr in hd_pipeline_run.find_step_run('hd_step'):
    hd_pipeline_id = attr.id

hd_steps_run = PipelineRun(hyperdrive_fraud_pipeline ,hd_pipeline_id)

for step in hd_steps_run.get_steps():
    hd_run_id = step.id

In [18]:
hd_run = HyperDriveRun(hyperdrive_fraud_pipeline, hd_run_id, hyperdrive_run_config)

In [19]:
hd_run

Experiment,Id,Type,Status,Details Page,Docs Page
fraud_detection_hyperdrive_pipeline,HD_7adad9a4-25cf-441b-9d89-95b657c4ef6f,hyperdrive,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [20]:
RunDetails(hd_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

#

## Retrieve Best Model

In [21]:
best_run = hd_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
script_arguments = best_run.get_details()['runDefinition']['arguments']

In [22]:
# Retrieve best model from Pipeline Run
best_model_output = hd_pipeline_run.get_pipeline_output(hyperdrive_best_iter_outputname)
num_file_downloaded = best_model_output.download('.', show_progress=True)

In [23]:
best_model_output

Name,Datastore,Path on Datastore,Produced By PipelineRun
best_iter_output,workspaceblobstore,azureml/7962af28-0081-474e-bd2c-319feff0487e/model_data,e09b1dc6-f679-4f99-9b93-bbd73c06d006


In [24]:
print('Best Run Id: ', best_run.id)
print(' -AUC:', best_run_metrics['AUC'])
print(' -Arguments:',script_arguments)

Best Run Id:  HD_7adad9a4-25cf-441b-9d89-95b657c4ef6f_2
 -AUC: 0.9320548099056014
 -Arguments: ['--input-data', 'DatasetConsumptionConfig:train_data_hd', '--bagging_fraction', '0.2197201934013026', '--feature_fraction', '0.4123170175756379', '--learning_rate', '0.019492716106797613', '--max_depth', '30', '--min_child_weight', '0.06696843942969444', '--min_data_in_leaf', '90', '--num_leaves', '370']


In [30]:
from azureml.core import Model

# Register model
best_run.register_model(model_path='outputs/hyperdrive_lgbm_fraud.pkl', model_name='LGBM_HyperDriveFraudDetection',
                        tags={'Training context':'Hyperdrive'},
                        properties={'AUC': best_run_metrics['AUC']})


Model(workspace=Workspace.create(name='azureml-eng-dev-generic', subscription_id='2186b060-2874-42c6-b0f7-0335ccdedb37', resource_group='azure-ml-eng-dev'), name=LGBM_HyperDriveFraudDetection, id=LGBM_HyperDriveFraudDetection:2, version=2, tags={'Training context': 'Hyperdrive'}, properties={'AUC': '0.9320548099056014'})

## Model Deployment

In [31]:
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

LGBM_HyperDriveFraudDetection version: 2
	 Training context : Hyperdrive
	 AUC : 0.9320548099056014


AutomlFraudDetection version: 1




In [32]:
model = ws.models['LGBM_HyperDriveFraudDetection']
print(model.name, 'version', model.version)

LGBM_HyperDriveFraudDetection version 2


In [33]:
import os

# Create a folder for the deployment files
deployment_folder = './fraud_detect_lgbm'
os.makedirs(deployment_folder, exist_ok=True)

# Set path for scoring script
script_file = 'score_fraud.py'
script_path = os.path.join(deployment_folder,script_file)

In [34]:
%%writefile $script_path
import json
import joblib
import numpy as np
import os

# Called when the service is loaded
def init():
    global model
    # Get the path to the deployed model file and load it
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'hyperdrive_lgbm_fraud.pkl')
    model = joblib.load(model_path)

# Called when a request is received
def run(raw_data):
    # Get the input data as a numpy array
    data = np.array(json.loads(raw_data)['data'])
    # Get a prediction from the model
    predictions = model.predict(data)
    # Get the corresponding classname for each prediction (0 or 1)
    classnames = ['non-fraud', 'fraud']
    predicted_classes = []
    for prediction in predictions:
        predicted_classes.append(classnames[prediction])
    # Return the predictions as JSON
    return json.dumps(predicted_classes)


Writing ./fraud_detect_lgbm/score_fraud.py


In [35]:

from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice

# Configure the scoring environment
service_env = Environment(name='service-env')
python_packages = ['scikit-learn', 'azureml-defaults', 'azure-ml-api-sdk', 'lightgbm',
                    'pandas', 'numpy', '']

for package in python_packages:
    service_env.python.conda_dependencies.add_pip_package(package)

inference_config = InferenceConfig(source_directory=deployment_folder,
                                   entry_script=script_file,
                                   environment=service_env)

# Configure the web service container
deployment_config = AciWebservice.deploy_configuration(cpu_cores=2, memory_gb=2)

# Deploy the model as a service
print('Deploying model...')
service_name = "lgbmfrauddetection"
service = Model.deploy(ws, service_name, [model], inference_config, deployment_config, overwrite=True)
service.wait_for_deployment(True)
print(service.state)

Deploying model...
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-10-27 09:09:27+00:00 Creating Container Registry if not exists.
2021-10-27 09:09:27+00:00 Registering the environment..
2021-10-27 09:09:47+00:00 Building image.

In [36]:
print(service.get_logs())

2021-10-27T09:18:29,120836800+00:00 - gunicorn/run 
2021-10-27T09:18:29,120924500+00:00 - rsyslog/run 
Dynamic Python package installation is disabled.
Starting HTTP server
2021-10-27T09:18:29,124075500+00:00 - nginx/run 
2021-10-27T09:18:29,124516200+00:00 - iot-server/run 
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-10-27T09:18:29,327287100+00:00 - iot-server/finish 1 0
2021-10-27T09:18:29,329749100+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (76)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 104
SPARK_HOME not set. Skipping PySpark Initialization.
Initializing logger
2021-10-27 09:18:30,166 | root | INFO | Starting up app insights client
logging socket was found. logging is available.
logging socket was found. logging is available.
2021-10-27 09:18:30,167 | root | INFO | Starting up request id generator
2021-10-27 09:18:30,167 | root | INFO | Sta

In [37]:
for webservice_name in ws.webservices:
    print(webservice_name)

lgbmfrauddetection
automlfrauddetection
