# Main

#### This is the main code

1) Import libraries

In [1]:
# Import libraries
import numpy as np
import os
import argparse
import pandas as pd
from azureml.core import Run, Model

#### 2.1) Connect to workspace

In [2]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.47.0 to work with ml_mlops


#### 2.3) Set up compute cluster

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "Scotts-compute-cluster"

try:
    # Check for existing compute target
    pipeline_cluster = ComputeTarget(ws, cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_F8S_V2', max_nodes=2)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


#### 3.1) Assing yml and env to pipeline

In [4]:
from azureml.core import Environment
from azureml.core.runconfig import RunConfiguration

script_dir = 'Script_files/'
#workingdir = 'working_dir/'

# Create a Python environment for the experiment (from a .yml file)
experiment_env = Environment.from_conda_specification("ScottPython_env", script_dir + "ScottPython_env.yml")

# Register the environment 
experiment_env.register(ws)
registered_env = Environment.get(ws, 'ScottPython_env')

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")

Run configuration created.


In [8]:
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.steps import PythonScriptStep

default_ds = ws.get_default_datastore()

# Create an OutputFileDatasetConfig (temporary Data Reference) for data passed from step 1 to step 2
prepped_data = OutputFileDatasetConfig(name="prepped_data", 
destination=(default_ds, "local_folder_temp/2"))

script_dir = 'Script_files/'

# Step 1, Run the data prep script
prep_step = PythonScriptStep(name = "Basic step",
                                source_directory = script_dir,
                                script_name = "pipe1.py",
                                arguments = ['--prepped-data', prepped_data],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

print("Pipeline steps defined")

Pipeline steps defined


In [9]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

# Construct the pipeline
pipeline_steps = [prep_step]
pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment = Experiment(workspace=ws, name = 'most-basic-pipeline')
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

Pipeline is built.
Created step Basic step [6142372c][d9817686-7e67-4e86-9bef-bc6a15dc0829], (This step will run and generate new outputs)
Submitted PipelineRun 4f5b2713-1137-46e7-87cc-7b37220039c3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/4f5b2713-1137-46e7-87cc-7b37220039c3?wsid=/subscriptions/93d79bfa-2175-4e42-b35c-6d4388d55441/resourcegroups/ds_sprint_2022/workspaces/ml_mlops&tid=f5a583e9-4cec-4a6b-8f76-0adc53e9f151
Pipeline submitted for execution.


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 4f5b2713-1137-46e7-87cc-7b37220039c3
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/4f5b2713-1137-46e7-87cc-7b37220039c3?wsid=/subscriptions/93d79bfa-2175-4e42-b35c-6d4388d55441/resourcegroups/ds_sprint_2022/workspaces/ml_mlops&tid=f5a583e9-4cec-4a6b-8f76-0adc53e9f151
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 5be78c9a-4b94-4425-bec3-c2a6abbcead5
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/5be78c9a-4b94-4425-bec3-c2a6abbcead5?wsid=/subscriptions/93d79bfa-2175-4e42-b35c-6d4388d55441/resourcegroups/ds_sprint_2022/workspaces/ml_mlops&tid=f5a583e9-4cec-4a6b-8f76-0adc53e9f151
StepRun( Basic step ) Status: NotStarted
StepRun( Basic step ) Status: Running

StepRun(Basic step) Execution Summary
StepRun( Basic step ) Status: Finished
{'runId': '5be78c9a-4b94-4425-bec3-c2a6abbcead5', 'target': 'Scotts-compute-cluster', 'status': 'Completed', 'startTimeUtc': '2023-05-01T10:17:32.025049Z', 'endTimeUtc': '2023-05-01T

'Finished'