In [1]:
import azureml.core

print("You are currently using version", azureml.core.VERSION, "of the Azure ML SDK")

##### Create Azure ML Workspace, if it doesn't exist

In [3]:
from azureml.core import Workspace

subscription_id = "4dd7cdaa-1664-46be-b521-237c98ccf3f6"

ws = Workspace.create(name="ML-Service-Workspace",
                      subscription_id=subscription_id,    
                      resource_group="ML-Service-RG",
                      create_resource_group=True,
                      location="eastus",
                      exist_ok=True)

##### Write workspace configuration file to local FS

In [5]:
ws.write_config()

##### Copy workspace configuration file to DBFS, to persist it

In [7]:
%sh cp /databricks/driver/aml_config/config.json /dbfs/tmp

##### Copy serialized model file from DBFS to local FS

In [9]:
#NOTE: Need to copy model from DBFS to local FS, as Azure ML service deployment always gets the model from the current working dir
import os

base_path = "/tmp/rf_model/"
model_name = "rf_model"

model_local = "file:" + os.getcwd() + "/" + model_name
dbutils.fs.cp(base_path, model_local, True)

local_path = os.getcwd() + "/" + model_name

In [10]:
%sh ls -la /databricks/driver/rf_model/rf_model.mllib

##### Register the model on Azure ML service

In [12]:
#Register the model
from azureml.core.model import Model

mymodel = Model.register(model_path = model_name + "/rf_model.mllib", # local path where the model file was copied to
                         model_name = model_name, # this is the name to be used to register the model on Azure ML service
                         description = "MLlib Randomforest Model",
                         workspace = ws)

print(mymodel.name, mymodel.description, mymodel.version)

##### Create a model scoring script and save it to local FS

In [14]:
score_sparkml = """

import json
import pickle
import pyspark
from azureml.core.model import Model
from pyspark.ml import PipelineModel
from pyspark.ml.feature import IndexToString

def init():
    # One-time initialization of PySpark and predictive model

    global trainedModel
    global spark

    spark = pyspark.sql.SparkSession.builder.appName("MLlib Randomforest Model Operationalization").getOrCreate()
    model_name = "{model_name}" #interpolated
    model_path = Model.get_model_path(model_name)
    trainedModel = PipelineModel.load(model_path)
    
def run(input_json):
    if isinstance(trainedModel, Exception):
        return json.dumps({{"trainedModel":str(trainedModel)}})
      
    try:
        sc = spark.sparkContext
        input_list = [json.loads(input_json)]
        input_rdd = sc.parallelize(input_list)
        input_df = spark.read.json(input_rdd)
    
        # Compute prediction
        prediction = trainedModel.transform(input_df)
        idx_to_string = IndexToString(inputCol="prediction", outputCol="classe_predicted", labels=trainedModel.stages[-3].labels)
        prediction = idx_to_string.transform(prediction)
        
        #result = prediction.first().prediction
        predictions = prediction.collect()

        #Get each scored result
        preds = [str(x["classe_predicted"]) for x in predictions]
        result = ",".join(preds)
        # you can return any data type as long as it is JSON-serializable
        return json.dumps({{"result":result}})        
    except Exception as e:
        result = str(e)
        return json.dumps({{"error":result}})
    
""".format(model_name=model_name)

exec(score_sparkml)

with open("score_sparkml.py", "w") as file:
    file.write(score_sparkml)

##### Create a deployment configuration file, optionally listing any dependent packages

In [16]:
from azureml.core.conda_dependencies import CondaDependencies 

myacienv = CondaDependencies.create(conda_packages=['scikit-learn','numpy','pandas']) #showing how to add libs as an example - not needed for this model.

with open("mydeployenv.yml","w") as f:
    f.write(myacienv.serialize_to_string())

##### Create deployment configuration

In [18]:
#Deployment configuration
from azureml.core.webservice import AciWebservice, Webservice

myaci_config = AciWebservice.deploy_configuration(
    cpu_cores = 2, 
    memory_gb = 2, 
    tags = {'name':'Databricks Azure ML ACI'}, 
    description = 'ACI to serve ADB Randomforest model.')

##### Create an image to host the model scoring script and deploy the web service

In [20]:
#This will take 10-15 minutes to finish

service_name = "aciadbws"
runtime = "spark-py" 
driver_file = "score_sparkml.py"
my_conda_file = "mydeployenv.yml"

# image creation
from azureml.core.image import ContainerImage
myimage_config = ContainerImage.image_configuration(execution_script = driver_file,
                                                    runtime = runtime,
                                                    conda_file = my_conda_file)

# Webservice creation
myservice = Webservice.deploy_from_model(workspace=ws,
                                         name=service_name,
                                         deployment_config = myaci_config,
                                         models = [mymodel],
                                         image_config = myimage_config)

myservice.wait_for_deployment(show_output=True)

In [21]:
print(myservice.state)

In [22]:
print(myservice.get_logs())