#### Workflow
1. Initialize 
    - Workspace
    - Default Datastore (to store pipeline step output)
    - compute Cluster (to run the pipeline)
    - Environment
    - Experiment
2. Get Input Data
3. create Pipeline Output folders to store pipeline step output
4. create pipeline steps
5. Submit Pipeline
6. Create an Endpoint and Publish Pipeline 
7. Invoke Pipeline 


##### Step 1: Initialize Workspace

In [12]:
from azureml.core import Workspace

#Initialize Workspace
ws = Workspace.from_config()
datastore =  ws.get_default_datastore() # to store pipeline data output, mandatory to define a default datastore 

##### Get Or Create Compute Cluster

In [13]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Initialize Compute Target
# Choose a name for your CPU cluster
compute = 'ML-Pipeline-Cluster'

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=compute)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4,
                                                           idle_seconds_before_scaledown=300)
    cpu_cluster = ComputeTarget.create(ws, compute, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


##### Create or Get Environment

In [14]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

# creating an environment
env_name = 'ENV-SDKv1'

# ---- Create environment using defining packages
# custom_env = Environment('aml-scikit-learn')
# conda_dep = CondaDependencies.create(conda_packages=['scikit-learn','pandas','numpy','pip', 'pyodbc','sqlalchemy'],
#                                      pip_packages=['azureml-defaults'])

# custom_env.python.conda_dependencies = conda_dep
# register the environment
# custom_env.register(workspace=ws)

# ---- create enviroment using .yaml file
##python_packages = Environment.from_conda_specification(env_name, '../dependencies/conda.yaml')
# register the environment
##python_packages.register(workspace=ws)

# # calling registered environent
reg_env = Environment.get(ws, env_name)

# create a run config object for the pipeline
pipeline_runconfig = RunConfiguration()

# # use the compute target
pipeline_runconfig.target = compute

# # assigning the run configuration to the envrionment
pipeline_runconfig.environment = reg_env
print('RunConfiguration created')

RunConfiguration created


In [15]:
from azureml.core import Experiment

# create an experiment
experiment_name = 'Pima_Training_pipeline_SDKv1'
experiment = Experiment(workspace = ws, name = experiment_name)

#### Step 2 : Get Input Data

In [16]:
from azureml.core import Dataset

# Get Input Data
dataset_name = 'pima-sdk-v1' 
df_tb   = Dataset.get_by_name(workspace=ws, name= dataset_name) # loading data from Dataset

##### Step 3: Create pipeline Output folder

In [17]:
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter

# creating a output folder
pre_process_output_folder = PipelineData(name='pre_process' , datastore=datastore)
post_process_output_folder= PipelineData(name='post_process',  datastore=datastore)
final_output_folder  = PipelineData(name='final_output',  datastore=datastore)

#### Step 4: Pipeline Steps

In [18]:
from azureml.pipeline.steps import PythonScriptStep

# creating pipeline steps
pre_process_step = PythonScriptStep(name = 'step 1: Data Preparation', 
                                    script_name='pima_dataProcessing_SDKv1.py', 
                                    arguments= [
                                                '--input_data'  , df_tb.as_named_input('raw_data'),
                                                '--train_test_ratio', 0.3, 
                                                '--output', pre_process_output_folder],   
                                    outputs  = [pre_process_output_folder],
                                    compute_target=compute, 
                                    runconfig=pipeline_runconfig, 
                                    allow_reuse=False, 
                                    source_directory='../src')

model_training_step = PythonScriptStep(name = 'step 2: Model Training', 
                                    script_name='pima_modelTraining_SDKv1.py',
                                    arguments= ['--input_data',pre_process_output_folder, 
                                                
                                                '--output', post_process_output_folder], 
                                    inputs   = [pre_process_output_folder], 
                                    outputs  = [post_process_output_folder], 
                                    compute_target=compute, 
                                    runconfig=pipeline_runconfig, 
                                    allow_reuse=False, 
                                    source_directory='../src') 

model_register_step = PythonScriptStep(name = 'step 3: Model Evaluation and registration', 
                                script_name='pima_modelRegister_SDKv1.py',
                                arguments= ['--actual_prediction_data',post_process_output_folder,
                                            '--output', final_output_folder
                                           ], 
                                inputs  = [post_process_output_folder], 
                                outputs = [final_output_folder], 
                                compute_target=compute, 
                                runconfig=pipeline_runconfig, 
                                allow_reuse=False,
                                source_directory='../src')

pipeline = Pipeline(workspace=ws, steps=[pre_process_step,model_training_step, model_register_step]) 
pipeline.validate()

[]

##### Step 5: Submit Pipeline

In [19]:
# Run the pipeline as an experiment
pipeline_run = Experiment(ws, 'Pima_Training_pipeline_SDKv1').submit(pipeline, continue_on_step_failure=True,)
pipeline_run.wait_for_completion(show_output=True)

Created step step 1: Data Preparation [8be655a7][d2ddb2e6-1da4-40a4-a33c-b5c9a93472a6], (This step will run and generate new outputs)
Created step step 2: Model Training [c737a3c2][9cfbc17c-4db0-4227-ae90-0c56bdb28252], (This step will run and generate new outputs)
Created step step 3: Model Evaluation and registration [7af1b054][47a2fe4b-7506-424e-a4d1-09319d1a5ea1], (This step will run and generate new outputs)
Submitted PipelineRun 864659d7-122f-4e42-bb08-0b810dec56e3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/864659d7-122f-4e42-bb08-0b810dec56e3?wsid=/subscriptions/ba5d6a04-af22-45ea-bc5a-946ef1c32949/resourcegroups/us_azure_practice/workspaces/us_azure&tid=5ac231ff-07da-46e9-9b1d-c924625f23bd
PipelineRunId: 864659d7-122f-4e42-bb08-0b810dec56e3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/864659d7-122f-4e42-bb08-0b810dec56e3?wsid=/subscriptions/ba5d6a04-af22-45ea-bc5a-946ef1c32949/resourcegroups/us_azure_practice/workspaces/us_azure&tid=5ac

'Finished'

##### Step 6: Creating an endpoint and publishing the pipeline

In [22]:
from azureml.pipeline.core import PipelineEndpoint

# The pipeline argument can be either a Pipeline or a PublishedPipeline
pipeline_endpoint = PipelineEndpoint.publish(workspace=ws,
                                             name="pima_training_PipelineEndpoint_SDKv1_02",
                                             pipeline=pipeline,
                                             description="pipeline to train model")

                                               

In [None]:
## publish it to same endpoint when the pipeline is enhanced or modified

# published_pipeline = pipeline_run.publish_pipeline(name = 'EnhancedTrainingPipelline',
#                                                    description='pipeline to train model',
#                                                    version='2')

# pipeline_endpoint = PipelineEndpoint.get(workspace=ws, name="training_PipelineEndpoint")
# pipeline_endpoint.add_default(published_pipeline)
# print(pipeline_endpoint.endpoint)
# pipeline_endpoint

##### Step 7: Invoking Endpoint

To run the pipeline from the REST endpoint, you first need an OAuth2 Bearer-type authentication header. This example uses interactive authentication for illustration purposes, but for most production scenarios requiring automated or headless authentication, use service principle authentication as described in this notebook.

Service principle authentication involves creating an App Registration in Azure Active Directory, generating a client secret, and then granting your service principal role access to your machine learning workspace. You then use the ServicePrincipalAuthentication class to manage your auth flow.

Both InteractiveLoginAuthentication and ServicePrincipalAuthentication inherit from AbstractAuthentication, and in both cases you use the get_authentication_header() function in the same way to fetch the header

In [23]:
from azureml.core.authentication import InteractiveLoginAuthentication
import requests

# Authentication
interactive_authentication = InteractiveLoginAuthentication()
auth_header = interactive_authentication.get_authentication_header()
print('authentication header ready')

response = requests.post(pipeline_endpoint.endpoint,
                         headers=auth_header,
                         json={"ExperimentName": "Pima_Training_pipeline_SDKv1"})
run_id = response.json()["Id"]
# print('pipeline invoked:',)
print(auth_header)

authentication header ready
{'Authorization': 'Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IjlHbW55RlBraGMzaE91UjIybXZTdmduTG83WSIsImtpZCI6IjlHbW55RlBraGMzaE91UjIybXZTdmduTG83WSJ9.eyJhdWQiOiJodHRwczovL21hbmFnZW1lbnQuY29yZS53aW5kb3dzLm5ldCIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzVhYzIzMWZmLTA3ZGEtNDZlOS05YjFkLWM5MjQ2MjVmMjNiZC8iLCJpYXQiOjE2OTk5NzA1ODMsIm5iZiI6MTY5OTk3MDU4MywiZXhwIjoxNjk5OTc0OTExLCJhY3IiOiIxIiwiYWlvIjoiQVlRQWUvOFZBQUFBWDJ4T2hlcE81cHBUY3libDdpWC93ZEord1pGZDJMaWs3NngyTUZkUHN4RytEWlBzTEpFNkl3N3dOcXZDY08xVmwrS1JQQUZKQVVucGdhWjFERWNZcDZnUENpVEZ2NW5ZWUtPa3lBSDFnMHlWeVRCRlduZ3N2R1FXanI2OGVRazB5N0FYOU5xTzhIY3NzZVkvemtQK2hIdjExM0JtUUMxR2pSRjFGM3B3R0M4PSIsImFsdHNlY2lkIjoiMTpsaXZlLmNvbTowMDAzNDAwMTUwNThDMDBGIiwiYW1yIjpbInB3ZCIsIm1mYSJdLCJhcHBpZCI6IjE4YTY2ZjVmLWRiZGYtNGMxNy05ZGQ3LTE2MzQ3MTJhOWNiZSIsImFwcGlkYWNyIjoiMiIsImVtYWlsIjoidWpfYXpAb3V0bG9vay5jb20iLCJmYW1pbHlfbmFtZSI6IlNJTkhBIiwiZ2l2ZW5fbmFtZSI6IlUiLCJncm91cHMiOlsiZjU0MTQyZWQtYWEyZS00ZDM5LWE0MmEtM2M5Njc5Zjk3Y2E5Il0sI

In [24]:
from azureml.pipeline.core.run import PipelineRun

published_pipeline_run = PipelineRun(ws.experiments["Pima_Training_pipeline_SDKv1"], run_id)
published_pipeline_run

Experiment,Id,Type,Status,Details Page,Docs Page
Pima_Training_pipeline_SDKv1,f6243499-2688-4f1c-9e64-63d25aeae98a,azureml.PipelineRun,Running,Link to Azure Machine Learning studio,Link to Documentation
