Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

In [2]:
import os
import json
import shutil

from reco_utils.dataset.criteo import get_spark_schema, load_spark_df

from azureml.core import Workspace
from azureml.core import VERSION as azureml_version

from azureml.core.model import Model
from azureml.core.image import ContainerImage

# Check core SDK version number
print("Azure ML SDK version: {}".format(azureml_version))

## Configure Scoring Service Variables

In [4]:
MODEL_NAME = 'criteo-lgbm.model'  # this name must exactly match the name used to save the pipeline model in the estimation notebook
MODEL_DESCRIPTION = 'LightGBM Criteo Model'

# Setup AzureML assets (names must be lower case alphanumeric without spaces and between 3 and 32 characters)
CONTAINER_RUN_TIME = 'spark-PY'

# Names of files that are used below
AML_CONFIG_PATH = "/dbfs/FileStore/aml_config/config.json"

CONDA_FILE = "deploy_conda.yaml"

IMPL_TAG = "streamselect"

# DRIVER_FILE = "mmlspark_streamscore.py"
# DRIVER_FILE = "mmlspark_basescore.py"
# DRIVER_FILE = "mmlspark_baseselect.py"
# DRIVER_FILE = "mmlspark_streamselect.py"
DRIVER_FILE = "mmlspark_"+IMPL_TAG+".py"

# Azure ML Webservice
SERVICE_NAME = 'lightgbm-criteo-'+IMPL_TAG
# Azure ML Container Image
CONTAINER_NAME = SERVICE_NAME


## Setup AzureML Workspace
Workspace configuration can be retrieved from the portal and uploaded to Databricks<br>
See [AzureML on Databricks](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#azure-databricks)

In [6]:
ws = Workspace.from_config(AML_CONFIG_PATH)

### Get the registered model

Model is already registered. Use the one registered.

In [8]:
models = Model.list(ws)
models = [m for m in models if m.name == MODEL_NAME]
max_model_version = max([m.version for m in models])
model = [m for m in models if m.version == max_model_version][0]
print('MODELNAME={}; VERSION={}'.format(model.name, model.version))

## Get the Scoring Script and Dependencies

Driver file is already created. Leverage that...

In [10]:
if not os.path.exists(DRIVER_FILE):
  raise ValueError('DRIVER_FILE: {} does not exist. Try again with a different DRIVER_FILE'.format(DRIVER_FILE))
  
if not os.path.exists(CONDA_FILE):
  raise ValueError('CONDA_FILE: {} does not exist. Try again with a different CONDA_FILE'.format(CONDA_FILE))

## Create the Image

We use the `ContainerImage` class to first configure the image with the defined driver and dependencies, then to create the image for use later.<br>
Building the image allows it to be downloaded and debugged locally using docker, see [troubleshooting instructions](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-troubleshoot-deployment)

In [12]:
image_config = ContainerImage.image_configuration(execution_script=DRIVER_FILE, 
                                                  runtime=CONTAINER_RUN_TIME,
                                                  conda_file=CONDA_FILE,
                                                  tags={"runtime":CONTAINER_RUN_TIME, "model": MODEL_NAME, "implementation": IMPL_TAG})

print(SERVICE_NAME)
image = ContainerImage.create(name=SERVICE_NAME,
                              models=[model],
                              image_config=image_config,
                              workspace=ws)

image.wait_for_creation(show_output=True)