In [1]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.37.0 to work with dp100-workspace


In [2]:
# Creating a training folder for training dataset and training file

import os,shutil

training_folder = 'training'
os.makedirs(training_folder,exist_ok=True)

shutil.copy('dp100_practice/car_prediction2.csv',os.path.join(training_folder,'car_prediction2.csv'))

'training/car_prediction2.csv'

In [3]:
%%writefile $training_folder/car_prediction.py
from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
import os
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score,roc_curve

# get the experiement run context
run = Run.get_context()

# loading diabetes data
data = pd.read_csv('car_prediction2.csv')

# applying label encoding
pre = preprocessing.LabelEncoder()
data1 = data.apply(pre.fit_transform)

# separating features and labels
x,y = data1[['Selling_Price','Present_Price','Kms_Driven','Fuel_Type','Seller_Type','Transmission','car_age']].values,data1['Owner'].values


# splitting the dataset into training and testing

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=123)

#setting regularizaton hyperparameter
reg= 0.02

# training the model
print('training logistic regression model with regularization rate',reg)
run.log('Regularization',np.float(reg))
model=LogisticRegression(C=1/reg,solver="liblinear").fit(x_train,y_train)

pred = model.predict(x_test)
acc = np.average(pred == y_test)
print('accuracy:',acc)
run.log('Accuracy',np.float(acc))

# calculating AUC
y_score=model.predict_proba(x_test)
auc=roc_auc_score(y_test,y_score[:,1])
print('AUC:'+ str(auc))
run.log('AUC',np.float(auc))

# saving the model into folder

os.makedirs('outputs',exist_ok=True)
joblib.dump(value=model, filename='outputs/car_model.pkl')

run.complete()

Overwriting training/car_prediction.py


In [4]:
# Running training script as experiement

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

# creating a Python Enviroment 

car_env = Environment('car_env')
car_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
car_env.docker.enabled = True # Use a docker container

# Create a set of package dependencies (conda or pip as required)
packages = CondaDependencies.create(conda_packages=['scikit-learn','pandas','pip'],
                                             pip_packages=['azureml-sdk','azureml-dataprep'])

# Add the dependencies to the environment
car_env.python.conda_dependencies = packages

print(car_env.name, 'defined.')

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


car_env defined.


In [5]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.widgets import RunDetails
from azureml.core.conda_dependencies import CondaDependencies

# Create a script config
script_config = ScriptRunConfig(source_directory=training_folder,
                                script='car_prediction.py',
                                environment=car_env) 

# submit the experiment run
experiment_name = 'car_training'
experiment = Experiment(workspace=ws, name=experiment_name)
run = experiment.submit(config=script_config)

# Show the running experiment run in the notebook widget
RunDetails(run).show()

# Block until the experiment run has completed
run.wait_for_completion()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

{'runId': 'car_training_1642092696_7c4e3e04',
 'target': 'local',
 'status': 'Finalizing',
 'startTimeUtc': '2022-01-13T16:51:41.113104Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '14e59ded-ae05-44f6-968d-c2071d7f9ba8',
  'azureml.git.repository_uri': 'https://github.com/MicrosoftLearning/DP100',
  'mlflow.source.git.repoURL': 'https://github.com/MicrosoftLearning/DP100',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.commit': 'de749cb0fe621ad79335c5ad7f4814c04ba855f5',
  'mlflow.source.git.commit': 'de749cb0fe621ad79335c5ad7f4814c04ba855f5',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'car_prediction.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {},
  'outputData': {}

In [7]:
# Get logged metrics and files
metrics = run.get_metrics()
for key in metrics.keys():
        print(key, metrics.get(key))
print('\n')
for file in run.get_file_names():
    print(file)

Regularization 0.02
Accuracy 0.9672131147540983
AUC 0.6779661016949152


azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/8_azureml.log
outputs/car_model.pkl


In [8]:
from azureml.core import Model

# Register the model
run.register_model(model_path='outputs/car_model.pkl', model_name='car_model',
                   tags={'Training context':'Script'},
                   properties={'AUC': run.get_metrics()['AUC'], 'Accuracy': run.get_metrics()['Accuracy']})

# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

car_model version: 3
	 Training context : Script
	 AUC : 0.6779661016949152
	 Accuracy : 0.9672131147540983


car_model version: 2
	 Training context : Script
	 AUC : 0.6779661016949152
	 Accuracy : 0.9672131147540983


diabetes_model version: 4
	 Training context : Script
	 AUC : 0.8483377282451863
	 Accuracy : 0.774


car_model2 version: 1
	 Training context : Decision Tree script
	 AUC : 0.4830508474576271
	 Accuracy : 0.9016393442622951


car_model version: 1
	 Training context : Script
	 AUC : 0.6779661016949152
	 Accuracy : 0.9672131147540983


diabetes_model version: 3
	 Training context : Pipeline
	 AUC : 0.8852361217164025
	 Accuracy : 0.8993333333333333


diabetes_model version: 2
	 Training context : Parameterized script
	 AUC : 0.8484377332205582
	 Accuracy : 0.774


diabetes_model version: 1
	 Training context : Script
	 AUC : 0.8483377282451863
	 Accuracy : 0.774


amlstudio-predict-diabetes version: 4
	 CreatedByAMLStudio : true


amlstudio-predict-diabetes version: 3
	 

In [9]:
# Creating a new script with new Algorithm and using some parameters

# Creating a training folder for training dataset and training file

import os,shutil

training_folder = 'training2'
os.makedirs(training_folder,exist_ok=True)

shutil.copy('dp100_practice/car_prediction2.csv',os.path.join(training_folder,'car_prediction2.csv'))

'training2/car_prediction2.csv'

In [10]:
%%writefile $training_folder/car_prediction2.py
from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
import os
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score,roc_curve

# get the experiement run context
run = Run.get_context()

# loading diabetes data
data = pd.read_csv('car_prediction2.csv')

# applying label encoding
pre = preprocessing.LabelEncoder()
data1 = data.apply(pre.fit_transform)

# separating features and labels
x,y = data1[['Selling_Price','Present_Price','Kms_Driven','Fuel_Type','Seller_Type','Transmission','car_age']].values,data1['Owner'].values


# splitting the dataset into training and testing

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=234)

# training the model
print('training Decision Tree Classifier model')
dt = DecisionTreeClassifier(max_depth=100)
model=dt.fit(x_train,y_train)

pred = model.predict(x_test)
acc = np.average(pred == y_test)
print('accuracy',acc)
run.log('Accuracy',np.float(acc))

# calculating AUC
y_score=model.predict_proba(x_test)
auc=roc_auc_score(y_test,y_score[:,1])
print('AUC'+ str(auc))
run.log('AUC',np.float(auc))

# saving the model into folder

os.makedirs('outputs',exist_ok=True)
joblib.dump(value=model, filename='outputs/car_model2.pkl')

run.complete()

Overwriting training2/car_prediction2.py


In [11]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.widgets import RunDetails
from azureml.core.conda_dependencies import CondaDependencies

# Create a script config
script_config = ScriptRunConfig(source_directory=training_folder,
                                script='car_prediction2.py',
                                environment=car_env) 

# submit the experiment run
experiment_name = 'car_training2'
experiment = Experiment(workspace=ws, name=experiment_name)
run = experiment.submit(config=script_config)

# Show the running experiment run in the notebook widget
RunDetails(run).show()

# Block until the experiment run has completed
run.wait_for_completion()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

{'runId': 'car_training2_1642092756_aad8dd27',
 'target': 'local',
 'status': 'Finalizing',
 'startTimeUtc': '2022-01-13T16:52:38.910509Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '29d4ca31-e544-47c8-af22-271e0ce85248',
  'azureml.git.repository_uri': 'https://github.com/MicrosoftLearning/DP100',
  'mlflow.source.git.repoURL': 'https://github.com/MicrosoftLearning/DP100',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.commit': 'de749cb0fe621ad79335c5ad7f4814c04ba855f5',
  'mlflow.source.git.commit': 'de749cb0fe621ad79335c5ad7f4814c04ba855f5',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'car_prediction2.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {},
  'outputData': 

In [12]:
# Get logged metrics and files
metrics = run.get_metrics()
for key in metrics.keys():
        print(key, metrics.get(key))
print('\n')
for file in run.get_file_names():
    print(file)

Accuracy 0.9016393442622951
AUC 0.4830508474576271


azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/8_azureml.log
outputs/car_model2.pkl


In [13]:
# Registering the new version of Model

from azureml.core import Model

# Register the model
run.register_model(model_path='outputs/car_model2.pkl', model_name='car_model2',
                   tags={'Training context':'Decision Tree script'},
                   properties={'AUC': run.get_metrics()['AUC'], 'Accuracy': run.get_metrics()['Accuracy']})

# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

car_model2 version: 2
	 Training context : Decision Tree script
	 AUC : 0.4830508474576271
	 Accuracy : 0.9016393442622951


car_model version: 3
	 Training context : Script
	 AUC : 0.6779661016949152
	 Accuracy : 0.9672131147540983


car_model version: 2
	 Training context : Script
	 AUC : 0.6779661016949152
	 Accuracy : 0.9672131147540983


diabetes_model version: 4
	 Training context : Script
	 AUC : 0.8483377282451863
	 Accuracy : 0.774


car_model2 version: 1
	 Training context : Decision Tree script
	 AUC : 0.4830508474576271
	 Accuracy : 0.9016393442622951


car_model version: 1
	 Training context : Script
	 AUC : 0.6779661016949152
	 Accuracy : 0.9672131147540983


diabetes_model version: 3
	 Training context : Pipeline
	 AUC : 0.8852361217164025
	 Accuracy : 0.8993333333333333


diabetes_model version: 2
	 Training context : Parameterized script
	 AUC : 0.8484377332205582
	 Accuracy : 0.774


diabetes_model version: 1
	 Training context : Script
	 AUC : 0.8483377282451863
	 Ac