# Automated Machine Learning: Classification with Local Compute

## Create/Link to an Experiment


In [1]:
import logging
import os
import random

import numpy as np
import pandas as pd
from sklearn import datasets

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun
from azureml.core.authentication import ServicePrincipalAuthentication

In [2]:
spAuth = ServicePrincipalAuthentication(
    tenant_id="TENANT_ID",
    username="SERVICE_PRINCIPAL_ID",
    password="SERVICE_PRINCIPAL_PASSWORD")

print("SDK Version: ", azureml.core.VERSION)

subscription_id = "SUBSCRIPTION_ID"
resource_group = "RESOURCE_GROUP_NAME"
workspace_name = "WORKSPACE_NAME"

ws = Workspace(auth=spAuth, subscription_id=subscription_id,
               resource_group=resource_group, workspace_name=workspace_name)

print("Loaded workspace: " + ws.name)

SDK Version:  1.0.2
Loaded workspace: WORKSPACE_NAME


In [3]:
experiment = Experiment(ws, 'automl-local-classification')

## Load Training Data


In [4]:
df = pd.read_csv("./Breast_cancer_data.csv", delimiter=",")
print(df.head())



   mean_radius  mean_texture  mean_perimeter  mean_area  mean_smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   diagnosis  
0          0  
1          0  
2          0  
3          0  
4          0  


In [5]:
from sklearn.model_selection import train_test_split
y_target = np.array(df['diagnosis'])
X_feature = df.drop(['diagnosis'], axis=1)

X_train, X_test, y_train, y_test= train_test_split(X_feature, y_target, test_size=0.3, random_state = 42)

## Configure AutoML


In [7]:
automl_config = AutoMLConfig(task = 'classification',
                             primary_metric = 'accuracy',
                             num_classes = 2,
                             iterations = 10,
                             model_explainability = False,
                             
                             #whitelist_models = ['LogisticRegression', 'KNN', 'LightGBM'],                             
                             max_cores_per_iteration = -1,
                             max_concurrent_iterations = 4,                             
                             iteration_timeout_minutes = 2,
                             
                             preprocess = False,                             
                             
                             X = X_train, 
                             y = y_train,
                             X_valid = X_test,
                             y_valid = y_test,
                             
                             debug_log = 'automl_debug.log',
                             verbosity = logging.INFO,
                             path = './automl-local-classification')

## Train the Models

In [8]:
local_run = experiment.submit(automl_config, show_output = True)

Parent Run ID: AutoML_a726215e-faa3-463b-8325-d0952bdb9866
*******************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
*******************************************************************************************

 ITERATION   PIPELINE                                       DURATION      METRIC      BEST
         0   StandardScalerWrapper KNN                      0:00:33       0.9240    0.9240
         1   StandardScalerWrapper KNN                      0:00:32       0.9591    0.9591
         2   StandardScalerWrapper LightGBM                 0:00:30       0.9591    0.9591
         3   RobustScaler LogisticRegression                0:00:30       0.9532    0.9591
         4    Ensemble                      

In [9]:
local_run

Experiment,Id,Type,Status,Details Page,Docs Page
automl-local-classification,AutoML_a726215e-faa3-463b-8325-d0952bdb9866,automl,Completed,Link to Azure Portal,Link to Documentation


In [10]:
from azureml.widgets import RunDetails
RunDetails(local_run).show() 

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 'sd…

### Retrieve the best model

In [11]:
best_run, fitted_model = local_run.get_output()
print(best_run)
print(fitted_model)

Run(Experiment: automl-local-classification,
Id: AutoML_a726215e-faa3-463b-8325-d0952bdb9866_4,
Type: None,
Status: Completed)
Pipeline(memory=None,
     steps=[('prefittedsoftvotingclassifier', PreFittedSoftVotingClassifier(classification_labels=None,
               estimators=[('KNN', Pipeline(memory=None,
     steps=[('StandardScalerWrapper', <automl.client.core.common.model_wrappers.StandardScalerWrapper object at 0x000001D99E777DA0>), ('KNeighbo...
          warm_start=False))]))],
               flatten_transform=None, weights=[0.4, 0.4, 0.2]))])
