
Workflow

1. Initialize
    - Workspace
    - compute Cluster (to run the pipeline)
    - Environment
    - Experiment
2. Get Input Data
3. Create a .py script to Train & Register Model
4. Configure ScriptRun

##### Step 1: Initializing Setup

In [7]:
#Initialize Workspace
from azureml.core import Workspace, Datastore, Dataset

ws = Workspace.from_config()

In [8]:
# Initialize Compute Target
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
compute = 'ML-Pipeline-Cluster'

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=compute)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4,
                                                           idle_seconds_before_scaledown=300)
    cpu_cluster = ComputeTarget.create(ws, compute, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [9]:
# creating an environment
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

env_name = 'ENV-SDKv1'

# ---- Create environment using defining packages
#custom_env = Environment('aml-scikit-learn')
#conda_dep = CondaDependencies.create(conda_packages=['scikit-learn','pandas','numpy','pip', 'pyodbc','sqlalchemy'],
#                                     pip_packages=['azureml-defaults'])

# custom_env.python.conda_dependencies = conda_dep
# register the environment
#custom_env.register(workspace=ws)

# ---- create enviroment using .yaml file
# python_packages = Environment.from_conda_specification(env_name, '../dependencies/conda.yaml')
# # register the environment
# python_packages.register(workspace=ws)

# # calling registered environent
reg_env = Environment.get(ws, env_name)

# create a run config object for the pipeline
pipeline_runconfig = RunConfiguration()

# # use the compute target
pipeline_runconfig.target = compute

# # assigning the run configuration to the envrionment
pipeline_runconfig.environment = reg_env
print('RunConfiguration created')

RunConfiguration created


In [10]:
# create an experiment
from azureml.core import Experiment
experiment_name = 'Pima_Experiments_Training_SDK_v1'
experiment = Experiment(workspace = ws, name = experiment_name)

##### Step 2: Get Data


In [11]:
# ---- Getting Data
dataset_name = 'pima-sdk-v1'
# loading data from Dataset
df_tb   = Dataset.get_by_name(workspace=ws, name= dataset_name)

Step 3: Configure ScriptRun Step

In [12]:
# configure & run
from azureml.core import ScriptRunConfig

# configure
args = ['--input_data', df_tb.as_named_input('raw_data'),
        '--train_test_ratio', 0.3]
        
src = ScriptRunConfig(source_directory='../src/', 
                      script='train_SDKv1.py',
                      arguments =args ,
                      compute_target=compute,
                      environment=reg_env,
                      )

# run 
run = experiment.submit(config=src)
run