# Azure ML Remote Debugging 101

## Get workspace 

**Always run this cell!**

Before running:

1. For authentication, run `az login` in terminal (requires Azure CLI).
2. Download `config.json` file from your workspace to the root of this repo (next to this notebook).

To download `config.json` file, when in ML Studio (ml.azure.com), click on you subscription name (upper-right corner), and choose "**Download config file**".

In [None]:
from azureml.core import Workspace

workspace = Workspace.from_config()
print(workspace)

## Get Dataset

Create any Azure ML file dataset before that!

In [None]:

from azureml.core import Datastore
from azureml.core import Dataset

dataset_name = 'mnist-dataset' # Choose an existing dataset to use

dataset = Dataset.get_by_name(workspace, dataset_name, version='latest')
print(dataset)

## Load workspace parameters

In [None]:
from dotenv import load_dotenv

load_dotenv()

# Must be an exisiting Azure ML compute - create an Azure ML compute instance or an Azure ML Compute Cluster.
compute_name = os.environ.get("COMPUTE_NAME") 

pipeline_name = os.environ.get("PIPELINE_NAME")
debug_connection_string = os.environ.get("DEBUG_GLOBAL_AZRELAY_CONNECTION_STRING")
debug_connection_string_secret_name = os.environ.get("DEBUG_GLOBAL_CONNECTION_SECRET_NAME")

# putting secrets to keyvault
workspace.get_default_keyvault().set_secret(
    debug_connection_string_secret_name, debug_connection_string)

print("Pipeline name: " + pipeline_name)


## Retrieve compute (existing)

In [None]:
compute_target = workspace.compute_targets[compute_name]
print(compute_target)

## Initialize Environment with debugging dependencies

In [None]:
from azureml.core.runconfig import Environment, CondaDependencies

batch_conda_deps = CondaDependencies.create(
        conda_packages=[],
        pip_packages=[
            'argparse==1.4.0',
            'azureml-core==1.22.0',
            'debugpy==1.2.1',
            'azure-debug-relay==0.5.1'
        ])
batch_env = Environment(name="train-env")
batch_env.docker.enabled = True
batch_env.python.conda_dependencies = batch_conda_deps
print("Environment has been created")

## Define debugging parameters

In [None]:
from azureml.pipeline.core import PipelineParameter

# Pipeline parameters to use with every run
is_debug = PipelineParameter("is_debug", default_value=False)
relay_connection_name = PipelineParameter(
    "debug_relay_connection_name", default_value="none")

## Create a PythonScriptStep

In [None]:
from azureml.core.runconfig import Environment, CondaDependencies
from azureml.pipeline.steps import PythonScriptStep
from azureml.core import RunConfiguration

single_step_config = RunConfiguration()
single_step_config.environment = batch_env

single_step = PythonScriptStep(
    name=f"basic-step",
    script_name="steps/basic_step.py",
    source_directory=".",
    runconfig=single_step_config,
    arguments=[
        "--dataset", dataset.as_mount(),
        "--is-debug", is_debug, # pipeline parameter
        "--debug-relay-connection-name", relay_connection_name, # pipeline parameter
        "--debug-port", 5678,
        "--debug-relay-connection-string-secret", debug_connection_string_secret_name
    ],
    compute_target=compute_target,
    allow_reuse=False
)

## Create and publish a pipeline

In [None]:
from azureml.pipeline.core import Pipeline

steps = [
        single_step
    ]

train_pipeline = Pipeline(workspace=workspace, steps=steps)
train_pipeline.validate()
published_pipeline = train_pipeline.publish(
    name=pipeline_name,
    description="Model training/retraining pipeline")

published_pipeline_id = published_pipeline.id
print("Published pipeline: " + published_pipeline.id)


## Run published pipeline

In [None]:
from azureml.pipeline.core import PublishedPipeline
from azureml.core.experiment import Experiment

if published_pipeline_id is None:
    raise ValueError("Initialize published_pipeline_id")

is_debug = True
debug_relay_connection_name = "test"

pipeline_parameters = {
        "is_debug": is_debug
        }
if is_debug:
    if debug_relay_connection_name == "":
        raise ValueError("Hybrid connection name cannot be empty!")

    pipeline_parameters.update({
        "debug_relay_connection_name": debug_relay_connection_name
        })

experiment=Experiment(workspace, "Pipeline_debug_experiment")
published_pipeline = PublishedPipeline.get(workspace=workspace, id=published_pipeline_id)
experiment.submit(published_pipeline, pipeline_parameters=pipeline_parameters)
