In [1]:
# To train models using script with parameters
import azureml.core
from azureml.core import Workspace


ws = Workspace.get(name="ml-tutorial-1",subscription_id="08c09bd9-9292-4776-9b92-dcd3da4f6cfb",resource_group="ml-resource-tutorial-1")
print("workspace name:",ws.name)

for compute_name in ws.compute_targets:
    print("compute name", ws.compute_targets[compute_name])

# Load the workspace from the saved config file
# ws = Workspace.from_config()   

workspace name: ml-tutorial-1
compute name {
  "Name": "my-ml-compute",
  "Id": "/subscriptions/08c09bd9-9292-4776-9b92-dcd3da4f6cfb/resourceGroups/ml-resource-tutorial-1/providers/Microsoft.MachineLearningServices/workspaces/ml-tutorial-1/computes/my-ml-compute",
  "Workspace": "ml-tutorial-1",
  "Location": "eastus",
  "VmSize": "STANDARD_D11_V2",
  "State": "Stopped",
  "Tags": {}
}


In [2]:
# make directory to copy data and save script

import os, shutil

script_folder = "scripts_to_train_diabetes"

os.makedirs(script_folder,exist_ok=True)

shutil.copy("../mslearn-dp100/data/diabetes.csv",os.path.join(script_folder,"diabetes.csv"))



'scripts_to_train_diabetes/diabetes.csv'

In [3]:
# script code to train models
%%writefile $script_folder/diabetes_training_script.py
from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

run = Run.get_context()

# Step-1 load the diabetes dataset
print("Loading Data...")
diabetes = pd.read_csv("diabetes.csv")

# step-2 Separate features and labels
X, y = diabetes[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, diabetes['Diabetic'].values


# step-3 split train & test
X_train, X_test, y_train,y_test = train_test_split(X,y, test_size = 0.3, random_state = 0)

# Set regularization hyperparameter
reg = 0.01
print('Training a logistic regression model with regularization rate of', reg)
run.log('Regularization Rate',  np.float(reg))

# step-4 train a model
model = LogisticRegression(C=1/reg,solver="liblinear").fit(X_train,y_train)

# step-5 predict value
y_predict = model.predict(X_test)

# step-6 calculate accuracy
acc = np.average(y_predict == y_test)
print('Accuracy:', acc)
run.log('Accuracy', np.float(acc))

# step-7 calculate AUC metrics
y_score = model.predict_prob(x_test)
auc = roc_auc_score(y_test,y_score[:,1])
print('AUC: ' + str(auc))
run.log('AUC', np.float(auc))

# step-8 Save the trained model in the outputs folder
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model,filename="outputs/trained_diabetes_model.pkl")

run.complete()

UsageError: Line magic function `%%writefile` not found.


In [8]:
# Run the training script as an experiment

from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.widgets import RunDetails

# step-1 create python env
env = Environment.from_conda_specification("experiment_env", "environment.yml")

# step-2 create script config
script_config = ScriptRunConfig(source_directory=script_folder,script="diabetes_training_script.py",environment=env)

# step-3 submit experimant
experiment_name = "mslearn-diabetes-using-script-one"
exp = Experiment(name = experiment_name, workspace = ws)
run = exp.submit(config = script_config)

# step-4 show run detail
RunDetails(run).show()

# step-5 Block until the experiment run has completed
run.wait_for_completion()



_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

{'runId': 'mslearn-diabetes-using-script-one_1640860910_441d0ebb',
 'target': 'local',
 'status': 'Finalizing',
 'startTimeUtc': '2021-12-30T10:41:51.995533Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '06ee1852-c309-47ef-b52e-1cf3b68538c4'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'diabetes_training_script.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'instanceTypes': [],
  'priority': None,
  'credentialPassthrough': False,
  'identity': None,
  'environment': {'name': 'experiment_env',
   'version': 'Autosave_2021-12-30T10:33:41Z_23dfaba9',
   'python': {'interpreterPath': 'python',
    'userManagedDependencies': False

In [9]:
# Get logged metrics and files

metrics= run.get_metrics()
for key in metrics.keys():
    print(key,metrics.get(key))

print('\n')
for file in run.get_file_names():
    print(file)

Regularization Rate 0.01
Accuracy 0.774
AUC 0.8484929598487486


azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/17709_azureml.log
outputs/trained_diabetes_model.pkl


In [12]:
# Register the trained model
from azureml.core import Model

run.register_model(model_path="outputs/trained_diabetes_model.pkl",model_name="trained_diabetes_model",
                   tags={'Training context':'Script'},
                   properties={'AUC': run.get_metrics()['AUC'], 'Accuracy': run.get_metrics()['Accuracy']})
# get models list
for model in Model.list(ws):
    print("model name", model.name, " : ", model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print("\t tag name:",tag_name," tag:",tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t prop name:',prop_name, ' prop:', prop)
    print('\n')
        


model name trained_diabetes_model  :  2
	 tag name: Training context  tag: Script
	 prop name: AUC  prop: 0.8484929598487486
	 prop name: Accuracy  prop: 0.774


model name trained_diabetes_model  :  1
	 tag name: Training context  tag: Script
	 prop name: AUC  prop: 0.8484929598487486
	 prop name: Accuracy  prop: 0.774




In [13]:
# Create a parameterized training script

In [14]:
import os, shutil

folder_script_1 = "scripts_to_train_diabetes_param"

os.makedirs(folder_script_1,exist_ok=True)

shutil.copy("../mslearn-dp100/data/diabetes.csv",os.path.join(folder_script_1,"diabetes.csv"))

'scripts_to_train_diabetes_param/diabetes.csv'

In [15]:
# script with param
%%writefile $folder_script_1/diabetes_training_param.py
# Import libraries
from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
import os
import argparse
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

run = Run.get_context()

# Set Param - regularization hyperparameter
parser = argparse.ArgumentParser()
parser.add_argument('--reg_rate',type=float,dest='reg',default=0.01)
arg = parser.parse_args()
reg = arg.reg

diabetes = pd.read_csv('diabetes.csv')

# Separate features and labels
X, y = diabetes[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, diabetes['Diabetic'].values

# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

# Train a logistic regression model
print('Training a logistic regression model with regularization rate of', reg)
run.log('Regularization Rate',  np.float(reg))
model = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)

y_predict = model.predict(X_test)

acc = np.average(y_hat == y_test)
print('Accuracy:', acc)
run.log('Accuracy', np.float(acc))

y_score = model.predict_proba(X_test)

auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))
run.log('AUC', np.float(auc))

os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/diabetes_model_param.pkl')

run.complete()





UsageError: Line magic function `%%writefile` not found.


In [18]:
# Run the script with arguments
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.widgets import RunDetails

# step-1 create python env
env = Environment.from_conda_specification("experiment_env", "environment.yml")

script_config = ScriptRunConfig(source_directory=folder_script_1,
                               script="diabetes_training_param.py",
                               arguments=["--reg_rate",0.01],
                               environment=env)

exp = Experiment(name = "mslearn-diabetes-using-script-param",workspace = ws)
run = exp.submit(config = script_config)

RunDetails(run).show()

run.wait_for_completion()




_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

{'runId': 'mslearn-diabetes-using-script-param_1640863965_fb71cbda',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2021-12-30T11:32:47.945728Z',
 'endTimeUtc': '2021-12-30T11:32:54.930202Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': 'ee039c53-7448-41b1-ab44-55eded2d1df1'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'diabetes_training_param.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--reg_rate', '0.01'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'instanceTypes': [],
  'priority': None,
  'credentialPassthrough': False,
  'identity': None,
  'environment': {'name': 'experiment_env',
   'version': 'Autosave_2021-12-30T10:33:41Z_23dfaba9',
   'python': 

In [19]:
# Get logged metrics
metrics = run.get_metrics()
for key in metrics.keys():
        print(key, metrics.get(key))
print('\n')
for file in run.get_file_names():
    print(file)

Regularization Rate 0.01
Accuracy 0.774
AUC 0.8484929598487486


azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/30703_azureml.log
outputs/diabetes_model_param.pkl


In [20]:

run.register_model(model_path="outputs/diabetes_model_param.pkl",model_name="diabetes_model_param",
                   tags={'Training context':'Parameterized script'},
                   properties={'AUC': run.get_metrics()['AUC'], 'Accuracy': run.get_metrics()['Accuracy']})

Model(workspace=Workspace.create(name='ml-tutorial-1', subscription_id='08c09bd9-9292-4776-9b92-dcd3da4f6cfb', resource_group='ml-resource-tutorial-1'), name=diabetes_model_param, id=diabetes_model_param:1, version=1, tags={'Training context': 'Parameterized script'}, properties={'AUC': '0.8484929598487486', 'Accuracy': '0.774'})

In [21]:
# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

diabetes_model_param version: 1
	 Training context : Parameterized script
	 AUC : 0.8484929598487486
	 Accuracy : 0.774


trained_diabetes_model version: 2
	 Training context : Script
	 AUC : 0.8484929598487486
	 Accuracy : 0.774


trained_diabetes_model version: 1
	 Training context : Script
	 AUC : 0.8484929598487486
	 Accuracy : 0.774




In [23]:
# List the files generated by the experiment
for file in run.get_file_names():
    print(file)

# Download a named file
run.download_file(name='outputs/diabetes_model_param.pkl', output_file_path='diabetes_model_param.pkl')

azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/30703_azureml.log
outputs/diabetes_model_param.pkl


In [24]:
# End #