# Model Operationalisation

Start with loading modules and initialising workspace

In [1]:
import logging
import os
import random
import re

#from matplotlib import pyplot as plt
#from matplotlib.pyplot import imshow
import numpy as np
import pandas as pd

import azureml
import azureml.core
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun
from azureml.core import Workspace, Run
from azureml.core.model import Model
ws = Workspace.from_config()

Found the config file in: C:\Users\zhpek\Desktop\C4TS-Challenge4\aml_config\config.json


## Selecting Best Model

Next, we want to pull up the experiment we're working on, loop through all runs and examine the best model in each run. We define the best model as the model with the best weighted f1 score.

In [2]:
experiment_name = 'jmc-maintenance' 
experiment = ws.experiments[experiment_name]
#ml_run = AutoMLRun(experiment=experiment, run_id=run_id)
selected_metric = 'f1_score_weighted'
#summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name'])
#pattern = re.compile('^AutoML_[^_]*$')
runs = list(experiment.get_runs())
#model_dict = {}
models_df = pd.DataFrame(index = [selected_metric, 'run'])
for run in runs:
    
    aml_run = AutoMLRun(experiment = experiment, run_id = run.id)
    print(run.id)
    best_run, __ = aml_run.get_output()
    metrics = best_run.get_metrics()
    
    if selected_metric in metrics:
        #model_dict[run.id]= {'run': aml_run}
        #model_dict[run.id][selected_metric] = metrics[selected_metric]
        models_df[run.id] = [metrics[selected_metric],aml_run]
        
models_df = models_df.T

AutoML_057e77cd-e4aa-4e42-8c4d-20b7340ba1fc


In [3]:
models_df

Unnamed: 0,f1_score_weighted,run
AutoML_057e77cd-e4aa-4e42-8c4d-20b7340ba1fc,0.31002,"Run(Experiment: jmc-maintenance,\nId: AutoML_0..."


In [4]:
run_id = models_df[selected_metric].astype(float).idxmax()
best_run = models_df.loc[run_id, 'run']
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
jmc-maintenance,AutoML_057e77cd-e4aa-4e42-8c4d-20b7340ba1fc,automl,Completed,Link to Azure Portal,Link to Documentation


With the selected best model, we register it and save the model ID for later.

In [5]:
model = best_run.register_model(description = experiment_name, tags = None)
model_id = best_run.model_id
model_id

Registering model AutoML057e77cdebest


'AutoML057e77cdebest'

Next, we set up run dependencies and download the artifacts created from the training step

In [6]:
rundl = AutoMLRun(experiment = experiment, run_id = best_run.id)
dependencies = rundl.get_run_sdk_dependencies(iteration = 2)

No issues found in the SDK package versions.


In [7]:
import pickle
from utils import scaler

artifacts = rundl.get_file_names()
print('Downloading files {}'.format(str(artifacts)))
for f in artifacts:
    rundl.download_file(f)

artifacts = ['pca_transform.pkl']
p = pickle.load(open('pca_transform.pkl','rb'))
s = pickle.load(open('scaler.pkl','rb'))
scaler_mean = list(s.mean)
scaler_var = list(s.var)
scaler_index = list(s.mean.index)

Downloading files []



Now we create the scoring script. The scoring script needs to load up the $\texttt{pca_transform}$ and $\texttt{scaler}$ objects for use on the training data.

$\textbf{NOTE}$: There is a known issue with unpickling custom objects in docker image created through ML services. As a workaround, the parameters from the $\texttt{scaler}$ object will be read and passed through scoring script locally as shown below, instead of uploaded as a dependency.

In [8]:
#%%writefile score_sparkml.py
#s_mean = {scaler_mean}
#s_var = {scaler_var}
#s_

#def scale(x):
#    result = (x - {scaler_mean}) / np.sqrt({scaler_var})
#    return (result)

score_script = """
 
import pickle
import json
import numpy
import azureml.train.automl
from sklearn.externals import joblib
from azureml.core.model import Model
import pandas as pd
import numpy as np

def cleanLongLat(l):
    split = l.str.split(',', expand=True)
    split = (split[0]+'.'+[i if len(i)>1 else i+'0' for i in split[1]]).astype(float)
    return(split)

def scale(x):
    s_mean = pd.Series({scaler_mean},{scaler_index})
    s_var = pd.Series({scaler_var},{scaler_index})
    
    result = (x - s_mean) / np.sqrt(s_var)
    return (result)
    
def init():

    global model
    global pca_transform
    
    model_id = "{model_id}"
    model_path = Model.get_model_path(model_name = model_id)
    # deserialize the model file back into a sklearn model
    model = joblib.load(model_path)
    pca_transform = pickle.load(open('pca_transform.pkl', 'rb'))


def run(rawdata):
    try:
        df = json.loads(rawdata)['data']
        df = pd.DataFrame.from_records(df)
        df['Latitude'] = cleanLongLat(df['Latitude'])
        df['Longitude'] = cleanLongLat(df['Longitude'])
        df.drop(['Machine_ID', 'District'], axis=1, inplace=True)
        df = df.apply(pd.to_numeric)
        data = pca_transform.transform(scale(df))[:,:10]
        result = model.predict(data)
    except Exception as e:
        result = str(e)
        return json.dumps({{"error": result}})
    return json.dumps({{"result":result.tolist()}}) 
    
""".format(model_id = model_id, scaler_mean = scaler_mean, scaler_var = scaler_var, scaler_index = scaler_index)#, scaler_mean = scaler_mean, scaler_var = scaler_var)

with open("score.py", "w") as file:
    file.write(score_script)


Next, we create the conda file and define parameters needed for image creation.

In [9]:
from azureml.core.conda_dependencies import CondaDependencies

myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','pandas'], pip_packages=['azureml-sdk[automl]'])

conda_env_file_name = 'myenv.yml'
myenv.save_to_file('.', conda_env_file_name)

'myenv.yml'

In [10]:
%%time
from azureml.core.image import Image, ContainerImage

image_config = ContainerImage.image_configuration(runtime= "python",
                                                  execution_script = 'score.py',
                                                  conda_file = conda_env_file_name,
                                                  dependencies = artifacts,
                                                  description = "Image for c4ts Challenge 4")

image = Image.create(name = "c4tsimg2",
                     models = [model],
                     image_config = image_config, 
                     workspace = ws)

image.wait_for_creation(show_output = True)

if image.creation_state == 'Failed':
    print("Image build log at: " + image.image_build_log_uri)

Creating image
Running....

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "c:\anaconda\envs\azureml\lib\site-packages\IPython\core\magics\execution.py", line 1271, in time
    exec(code, glob, local_ns)
  File "<timed exec>", line 14, in <module>
  File "c:\anaconda\envs\azureml\lib\site-packages\azureml\core\image\image.py", line 397, in wait_for_creation
    time.sleep(5)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\anaconda\envs\azureml\lib\site-packages\IPython\core\interactiveshell.py", line 2018, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\anaconda\envs\azureml\lib\site-packages\IPython\core\ultratb.py", line 1095, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "c:\anac

KeyboardInterrupt: 

Since this deployment is for testing at the moment, we use ACI with 1 core and 1gb for deployment. In deploying to production, an ACI cluster is also sufficient, given predictable workload.

In [11]:
from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               tags={"data": "c4ts4",  "method" : "automl"}, 
                                               description='C4TS Challenge 4')

In [12]:
%%time
from azureml.core.webservice import Webservice

service = Webservice.deploy_from_image(workspace=ws,
                                       name='c4ts-jmc-svc',
                                       deployment_config=aciconfig,
                                       image=image)

service.wait_for_deployment(show_output=True)
print(service.state)

KeyboardInterrupt: 

## Scoring New Data

The final step is scoring the new data. We parse the new data in, send it to the deployed API above and write the result back into the data source.

This result can then be consumed in a PowerBI dashboard, or machines with predicted imminent failures will trigger an alert for an engineer.

In [13]:
#df = pd.read_csv('AssetData_Historical.csv')
#df.drop('Failure_NextHour', axis = 1, inplace=True)
df= pd.read_csv('AssetData_New.csv')

In [14]:
df

KeyboardInterrupt: 

In [None]:
import csv
import json
with open('AssetData_New.csv') as f:
    reader = csv.DictReader(f)
    titles = reader.fieldnames
    j_test = json.dumps({'data':[row for row in reader]})
    
result = service.run(input_data = j_test)

In [None]:
result