# Create Azure Resources 

The goal of this notebook is to create appropriate azure resources so that one can run the collection of SAR parallel scoring notebooks efficiently.

## Dependencies

This uses `azure-cli` to do some of the steps to create Azure Resources.


## Credits

This noteboook draws heavily from prior work on batch scoring using AML Pipelines: https://github.com/Microsoft/AMLBatchScoringPipeline/blob/master/01_create_resources.ipynb



In [None]:
amlpipeline_configuration_filename = 'pipeline_config_programmatic.json'

In [None]:
from azureml.core import Workspace
from azureml.core.compute import AmlCompute, ComputeTarget
import os
import json

In [None]:
# Subscription & resource group
SUBSCRIPTION_ID = os.getenv("AZ_SUB","")
RESOURCE_GROUP = "jeremr-parallelscore-auto"
LOCATION = "eastus"

In [None]:
%%sh
# LOGIN IF NECESSARY
list=`az account list -o table`
if [ "$list" == '[]' ]; then
  echo "*** LOGGING INTO AZURE..."
  LOGIN_OUTPUT=$(az login -o table)
else
  echo "*** Already logged in to Azure."
fi

In [None]:
## set appropriate subscription
!az account set -s {SUBSCRIPTION_ID}

In [None]:
## create the resource group
!az group create -l {LOCATION} -n {RESOURCE_GROUP}

In [None]:
# AML workspace and compute target
AML_WORKSPACE = ("{}ws".format(RESOURCE_GROUP)).replace('_','-')
AML_COMPUTE_NAME = ("{}cmpt".format(RESOURCE_GROUP)).replace('_','-')[0:16] # limit to 16 chars
AML_VM_SIZE = "Standard_D2"
AML_MIN_NODES = 4
AML_MAX_NODES = 4
AML_CONFIG_PATH = "prog_aml"

# Scoring script
CONDA_PACKAGES = ["fastparquet"]
PIP_PACKAGES = ["dask>=0.17.1", "pandas>=0.23.4", "numpy>=1.13.3", "scipy>=1.0.0", "toolz", "cloudpickle"]
PYTHON_VERSION = "3.6.7"
PYTHON_SCRIPT_NAME = "score.py"
PYTHON_SCRIPT_DIRECTORY = "scripts"

## blobs were already created and had data uploaded to them via a similar script
## e.g.
## for fn in $(ls ratings_10m.parquet/*); do echo $fn; az storage blob upload -f $fn -c inputs -n $fn --account-name jrrecostorage;  done
##  az storage blob upload -f sar_model_10m_fit0.pkl -c models -n sar_model_10m_fit0.pkl --account-name jrrecostorage
 
STORAGE_ACCOUNT = "{}storage".format(RESOURCE_GROUP).replace('-','').replace('_','')[0:24]
INPUTS_CONTAINER = "inputs"
MODELS_CONTAINER = "models"
OUTPUTS_CONTAINER = "outputs"
print(STORAGE_ACCOUNT)

In [None]:
# Create Blob storage account
!az storage account create -n {STORAGE_ACCOUNT} -g {RESOURCE_GROUP} -l {LOCATION}

In [None]:
## get blob keys
tmp_blob_info = !az storage account keys list -g {RESOURCE_GROUP} -n {STORAGE_ACCOUNT}
blob_key = json.loads(''.join(tmp_blob_info))[0]['value']

In [None]:
# Create models, predictions and data containers
!az storage container create -n {INPUTS_CONTAINER} --account-key {blob_key} --account-name {STORAGE_ACCOUNT}
!az storage container create -n {MODELS_CONTAINER} --account-key {blob_key} --account-name {STORAGE_ACCOUNT}
!az storage container create -n {OUTPUTS_CONTAINER} --account-key {blob_key} --account-name {STORAGE_ACCOUNT}

In [None]:
# Create AML Workspace
aml_ws = Workspace.create(
    name=AML_WORKSPACE,
    subscription_id=SUBSCRIPTION_ID,
    resource_group=RESOURCE_GROUP,
    create_resource_group=False,
    location=LOCATION,
    exist_ok=True
)

In [None]:
os.makedirs(AML_CONFIG_PATH)
aml_ws.write_config(AML_CONFIG_PATH)

In [None]:
# Create compute target
provisioning_config = AmlCompute.provisioning_configuration(vm_size = AML_VM_SIZE,
                                                            min_nodes = AML_MIN_NODES,
                                                            max_nodes = AML_MAX_NODES)

compute_target = ComputeTarget.create(aml_ws, AML_COMPUTE_NAME, provisioning_config)
compute_target.wait_for_completion(show_output=True)

In [None]:
# Create and get service principal credentials
# couldn't install jq, so done manually
temp = !az ad sp create-for-rbac

In [None]:
## filter out WARNING MESSAGES
sp_info=json.loads(''.join([k for k in temp if not 'WARNING' in k]))
sp_client = sp_info['appId']
sp_secret = sp_info['password']
tenant_id = sp_info['tenant']

In [None]:
pipeline_config = {
  "sp_tenant": tenant_id,
  "sp_client": sp_client,
  "sp_secret": sp_secret,
  "resource_group_name": RESOURCE_GROUP,
  "subscription_id": SUBSCRIPTION_ID,
  "aml_work_space": AML_WORKSPACE,
  "experiment_name": "mm_score",
  "cluster_name": AML_COMPUTE_NAME,
  "location": LOCATION,
  "blob_account": STORAGE_ACCOUNT,
  "blob_key": blob_key,
  "models_blob_container": MODELS_CONTAINER,
  "data_blob_container": INPUTS_CONTAINER,
  "preds_blob_container": OUTPUTS_CONTAINER,
  "conda_packages": CONDA_PACKAGES,
  "pip_packages": PIP_PACKAGES,
  "python_version": PYTHON_VERSION,
  "python_script_name": PYTHON_SCRIPT_NAME,
  "python_script_directory": PYTHON_SCRIPT_DIRECTORY
}
with open(amlpipeline_configuration_filename, 'w') as f:
    json.dump(pipeline_config, f, indent=4)