#### Workflow
1. Initialize 
    - Workspace
    - Default Datastore (to store pipeline step output)
    - compute Cluster (to run the pipeline)
    - Environment
    - Experiment
2. Get Input Data
3. create Pipeline Output folders to store pipeline step output
4. create pipeline steps
5. Submit Pipeline
6. Create an Endpoint and Publish Pipeline 
7. Invoke Pipeline 


##### Step 1: Initialize Workspace

In [1]:
# Initialize Workspace
from azureml.core import Workspace

ws = Workspace.from_config()
datastore =  ws.get_default_datastore() # to store pipeline data output, mandatory to define a default datastore 

##### Get Or Create Compute Cluster

In [2]:
# Initialize Compute Target
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
compute = 'ML-Pipeline-Cluster'

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=compute)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4,
                                                           idle_seconds_before_scaledown=300)
    cpu_cluster = ComputeTarget.create(ws, compute, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


##### Create or Get Environment

In [3]:
# creating an environment
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

env_name = 'ENV-SDKv1'

# ---- Create environment using defining packages
# custom_env = Environment('aml-scikit-learn')
# conda_dep = CondaDependencies.create(conda_packages=['scikit-learn','pandas','numpy','pip', 'pyodbc','sqlalchemy'],
#                                      pip_packages=['azureml-defaults'])

# custom_env.python.conda_dependencies = conda_dep
# register the environment
# custom_env.register(workspace=ws)

# ---- create enviroment using .yaml file
python_packages = Environment.from_conda_specification(env_name, '../dependencies/conda.yaml')
# register the environment
python_packages.register(workspace=ws)

# # calling registered environent
reg_env = Environment.get(ws, env_name)

# create a run config object for the pipeline
pipeline_runconfig = RunConfiguration()

# # use the compute target
pipeline_runconfig.target = compute

# # assigning the run configuration to the envrionment
pipeline_runconfig.environment = reg_env
print('RunConfiguration created')

RunConfiguration created


In [4]:
from azureml.core import Experiment

# create an experiment
experiment_name = 'Pima_Inference_pipeline_SDKv1'
experiment = Experiment(workspace = ws, name = experiment_name)

#### Step 2 : Get Input Data

In [5]:
from azureml.core import Dataset

# Get Input Data
dataset_name = 'pima_test_typeTabular_SDKv1'
df_tb   = Dataset.get_by_name(workspace=ws, name= dataset_name) # loading data from Dataset

##### Step 3: Create pipeline Output folder

In [21]:
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter

# creating a output folder
pre_process_output_folder = PipelineData(name='pre_process' , datastore=datastore)
final_output_folder  = PipelineData(name='final_output',  datastore=datastore)

#### Step 4: Pipeline Steps

In [22]:
from azureml.pipeline.steps import PythonScriptStep

# creating pipeline steps
pre_process_step = PythonScriptStep(name = 'step 1: Data Preparation', 
                                    script_name='pima_inference_dataProcessing_SDKv1.py', 
                                    arguments= [
                                                '--input_data'  , df_tb.as_named_input('raw_data'), 
                                                '--output', pre_process_output_folder],   
                                    outputs  = [pre_process_output_folder],
                                    compute_target=compute, 
                                    runconfig=pipeline_runconfig, 
                                    allow_reuse=False, 
                                    source_directory='../src')

model_prediction_step = PythonScriptStep(name = 'step 2: Model Prediction', 
                                    script_name='pima_modelPrediction_SDKv1.py',
                                    arguments= ['--processed_data',pre_process_output_folder, 
                                                '--output', final_output_folder], 
                                    inputs   = [pre_process_output_folder], 
                                    outputs  = [final_output_folder], 
                                    compute_target=compute, 
                                    runconfig=pipeline_runconfig, 
                                    allow_reuse=False, 
                                    source_directory='../src') 


pipeline = Pipeline(workspace=ws, steps=[pre_process_step,model_prediction_step]) 
pipeline.validate()

Step step 1: Data Preparation is ready to be created [e71767ee]
Step step 2: Model Prediction is ready to be created [e823f52c]


[]

##### Step 5: Submit Pipeline

In [23]:
# Run the pipeline as an experiment
pipeline_run = Experiment(ws, 'Pima_Inference_pipeline_SDKv1').submit(pipeline, continue_on_step_failure=True,)
pipeline_run.wait_for_completion(show_output=True)

Created step step 1: Data Preparation [e71767ee][c2095bb7-8156-4ab8-a51e-8fda01853b51], (This step will run and generate new outputs)
Created step step 2: Model Prediction [e823f52c][de299d99-7a5f-4e36-944b-5998bfc6e4e7], (This step will run and generate new outputs)
Submitted PipelineRun 8f61236c-be3b-499c-90f9-45a5ddce1499
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/8f61236c-be3b-499c-90f9-45a5ddce1499?wsid=/subscriptions/ba5d6a04-af22-45ea-bc5a-946ef1c32949/resourcegroups/us_azure_practice/workspaces/us_azure&tid=5ac231ff-07da-46e9-9b1d-c924625f23bd
PipelineRunId: 8f61236c-be3b-499c-90f9-45a5ddce1499
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/8f61236c-be3b-499c-90f9-45a5ddce1499?wsid=/subscriptions/ba5d6a04-af22-45ea-bc5a-946ef1c32949/resourcegroups/us_azure_practice/workspaces/us_azure&tid=5ac231ff-07da-46e9-9b1d-c924625f23bd
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: a97fbd61-886a-4866-b6bb-25264fde46e2
Link t

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "Execution failed. User process '/azureml-envs/azureml_dde9de8808e90dc357d096bbcd2dcc0e/bin/python' exited with status code 1. Please check log file 'user_logs/std_log.txt' for error details. Error: Traceback (most recent call last):\n  File \"pima_inference_dataProcessing_SDKv1.py\", line 177, in <module>\n    processed_df = train_preprocessing(df)\n  File \"pima_inference_dataProcessing_SDKv1.py\", line 86, in train_preprocessing\n    if artifacts:\nNameError: name 'artifacts' is not defined\n\n",
        "messageParameters": {},
        "details": []
    },
    "time": "0001-01-01T00:00:00.000Z",
    "componentName": "CommonRuntime"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"Execution failed. User process '/azureml-envs/azureml_dde9de8808e90dc357d096bbcd2dcc0e/bin/python' exited with status code 1. Please check log file 'user_logs/std_log.txt' for error details. Error: Traceback (most recent call last):\\n  File \\\"pima_inference_dataProcessing_SDKv1.py\\\", line 177, in <module>\\n    processed_df = train_preprocessing(df)\\n  File \\\"pima_inference_dataProcessing_SDKv1.py\\\", line 86, in train_preprocessing\\n    if artifacts:\\nNameError: name 'artifacts' is not defined\\n\\n\",\n        \"messageParameters\": {},\n        \"details\": []\n    },\n    \"time\": \"0001-01-01T00:00:00.000Z\",\n    \"componentName\": \"CommonRuntime\"\n}"
    }
}

##### Step 6: Creating an endpoint and publishing the pipeline

In [1]:
from azureml.pipeline.core import PipelineEndpoint

# The pipeline argument can be either a Pipeline or a PublishedPipeline
pipeline_endpoint = PipelineEndpoint.publish(workspace=ws,
                                             name="pima_sdkV1_prediction_PipelineEdnpoint",
                                             pipeline=pipeline,
                                             description="pipeline to predict data")

                                               

In [None]:
## publish it to same endpoint when the pipeline is enhanced or modified

# published_pipeline = pipeline_run.publish_pipeline(name ='predictionPipline_v2',
#                                                    description='pipeline to predict data',
#                                                    version='2')

# pipeline_endpoint = PipelineEndpoint.get(workspace=ws, name="prediction_PipelineEdnpoint")
# pipeline_endpoint.add_default(published_pipeline)
# print(pipeline_endpoint.endpoint)
# pipeline_endpoint

##### Step 7: Invoking Endpoint

To run the pipeline from the REST endpoint, you first need an OAuth2 Bearer-type authentication header. This example uses interactive authentication for illustration purposes, but for most production scenarios requiring automated or headless authentication, use service principle authentication as described in this notebook.

Service principle authentication involves creating an App Registration in Azure Active Directory, generating a client secret, and then granting your service principal role access to your machine learning workspace. You then use the ServicePrincipalAuthentication class to manage your auth flow.

Both InteractiveLoginAuthentication and ServicePrincipalAuthentication inherit from AbstractAuthentication, and in both cases you use the get_authentication_header() function in the same way to fetch the header

In [None]:
from azureml.core.authentication import InteractiveLoginAuthentication
import requests

# Authentication
interactive_authentication = InteractiveLoginAuthentication()
auth_header = interactive_authentication.get_authentication_header()
print('authentication header ready')

response = requests.post(pipeline_endpoint.endpoint,
                         headers=auth_header,
                         json={"ExperimentName": "Experiments_Training"})
run_id = response.json()["Id"]
# print('pipeline invoked:',)
print(auth_header)

In [None]:
from azureml.pipeline.core.run import PipelineRun

published_pipeline_run = PipelineRun(ws.experiments["Experiments_Training"], run_id)
published_pipeline_run