# Automated ML

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [None]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.widgets import RunDetails
import os
import joblib

from azureml.train.automl import AutoMLConfig
from pprint import pprint # Used in printing automl model parameters
from azureml.core import Model # Used to get model information

## Dataset

### Overview

Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

data_loc = "https://raw.githubusercontent.com/truongvanthanh95/azure-machine-learning/refs/heads/main/loan_data.csv"
ds = TabularDatasetFactory.from_delimited_files(data_loc)
data = ds.to_pandas_dataframe().dropna()

In [None]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'Capstone-Project-Experiment'

experiment=Experiment(ws, experiment_name)

In [None]:
# Compute name should contain only letters, digits, hyphen and should be 2-16 charachters long
compute_name = "auto-ml-compute"
try:
    trainCluster = ComputeTarget(ws, compute_name)
    print(f"{compute_name} exists already")
except:
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_DS3_V2", max_nodes=5)
    trainCluster = ComputeTarget.create(ws, compute_name, compute_config)
trainCluster.wait_for_completion(show_output=True)

## AutoML Configuration

In [None]:
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'accuracy'
}

automl_config = AutoMLConfig(
        task='classification',
        compute_target=trainCluster,
        training_data=ds,
        label_column_name='loan_status',
        n_cross_validations=5,
        **automl_settings
)

In [None]:
remote_run = experiment.submit(automl_config)

## Run Details

In the cell below, use the `RunDetails` widget to show the different experiments.

In [None]:
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

## Best Model

In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [None]:
# Get best run and model
best_run, fitted_model = remote_run.get_output()

# Print the best run
print(best_run)

# Get all metrics of the best run
best_run_metrics = best_run.get_metrics()

# Print all metrics of the best run
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)

In [None]:
# Print detailed parameters of the fitted model
def print_model(model, prefix=""):
    for step in model.steps:
        print(prefix + step[0])
        if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
            pprint({'estimators': list(
                e[0] for e in step[1].estimators), 'weights': step[1].weights})
            print()
            for estimator in step[1].estimators:
                print_model(estimator[1], estimator[0] + ' - ')
        else:
            pprint(step[1].get_params())
            print()

print_model(fitted_model)

In [None]:
#Save the best model
myModel = best_run.register_model(model_path='outputs/model.pkl', model_name='capstoneModel_automl',
                        tags={'Training context':'Auto ML'},
                        properties={'Accuracy': best_run_metrics['accuracy']})

print(myModel)

In [None]:
# List registered models to verify if model has been saved
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')