In [1]:
import azureml.core

In [2]:
print(azureml.core.VERSION)

1.47.0


# Create Workspace

In [3]:
from azureml.core import Workspace
from azureml.core.authentication import InteractiveLoginAuthentication

ws = Workspace.from_config()
print(ws.name, "loaded successfully")

Performing interactive authentication. Please follow the instructions on the terminal.


The default web browser has been opened at https://login.microsoftonline.com/organizations/oauth2/v2.0/authorize. Please continue the login in the web browser. If no web browser is available or if the web browser fails to open, use device code flow with `az login --use-device-code`.
The following tenants require Multi-Factor Authentication (MFA). Use 'az login --tenant TENANT_ID' to explicitly login to a tenant.
36da45f1-dd2c-4d1f-af13-5abe46b99921 'Deloitte (O365D)'


Interactive authentication successfully completed.
aml-aks-poc loaded successfully


Upload data by using get_default_datastore()

In [3]:
from azureml.core import Workspace
ws = Workspace.from_config()
ds = ws.get_default_datastore()
ds.upload(src_dir='./jobsdata', target_path='jobsdata', overwrite=True, show_progress=True)

print('Uploaded Jobs Data')

"Datastore.upload" is deprecated after version 1.0.69. Please use "Dataset.File.upload_directory" to upload your files             from a local directory and create FileDataset in single method call. See Dataset API change notice at https://aka.ms/dataset-deprecation.


Uploading an estimated of 1 files
Uploading ./jobsdata\jobs.csv
Uploaded ./jobsdata\jobs.csv, 1 files out of an estimated total of 1
Uploaded 1 files
Uploaded Jobs Data


# Create Compute Cluster

In [4]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# Naming the cluster and setting minimal and maximal number of nodes 
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 1)

# Choosing environment variables 
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")

provisioning_config = AmlCompute.provisioning_configuration(
    vm_size = vm_size, min_nodes = min_nodes, max_nodes = max_nodes)

# Creating the cluster
compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

print('Compute target created')

Compute target created


Create ML Model

In [6]:
%%writefile $train.py

import argparse
import os
import numpy as np
import pandas as pd
import glob

from azureml.core import Run
# from utils import load_data

import joblib

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score

# let user feed in 2 parameters, the dataset to mount or download, and the regularization rate of the logistic regression model
parser = argparse.ArgumentParser()
parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
parser.add_argument('--max-depth', type=float, dest='max_depth', default=5, help='max depth')
args = parser.parse_args()

###
data_folder = os.path.join(args.data_folder, 'jobsdata')
print('Data folder:', data_folder)

job_data = pd.read_csv(os.path.join(data_folder, 'jobs.csv'))

                        
X = job_data.drop(columns =["sal"])
y = job_data["sal"]

clf = DecisionTreeRegressor(random_state=0,max_depth = args.max_depth)
rmse= np.mean(np.sqrt(-cross_val_score(clf, X, y, scoring="neg_mean_squared_error", cv = 5)))
print('RMSE is', rmse)

# Get the experiment run context
run = Run.get_context()

run.log('max depth', np.float(args.max_depth))
run.log('rmse', np.float(rmse))

os.makedirs('outputs', exist_ok=True)

clf.fit(X,y)
# file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=clf, filename='outputs/job_model.pkl')

run.complete()

Writing $train.py


Creating Environment

In [7]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies


job_env = Environment("job-experiment-env")
job_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
job_env.docker.enabled = False # Docker container

job_packages = CondaDependencies.create(conda_packages=['scikit-learn'])

# Add the dependencies to the environment
job_env.python.conda_dependencies = job_packages

print(job_env.name, 'defined.')

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


job-experiment-env defined.


Registering env to Azure ML Workspace

In [8]:
job_env.register(workspace=ws)

{
    "assetId": "azureml://locations/eastus2/workspaces/6f7360e0-658a-4233-8141-019587c2881d/environments/job-experiment-env/versions/1",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221010.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "job-experiment-env