In [23]:
import sys, os
import azureml.core
from azureml.core import Workspace

ws = Workspace.from_config()

print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')


SDK version: 1.18.0
MLflow version: 1.12.1
zd-ner
scan-datasources-rg
southcentralus
d35ad7f3-a6be-400c-b1db-8b70322a01a0


In [24]:
from azureml.core import Experiment

experiment_name = "ner"
experiment = Experiment(ws, name=experiment_name)


In [25]:
from azureml.core import Environment

pytorch_env = Environment.from_conda_specification(name = 'pytorch-1.6-gpu', file_path = 'conda.yaml')

# Specify a GPU base image
pytorch_env.docker.enabled = True
pytorch_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04'

In [54]:
from azureml.core import ScriptRunConfig
from azureml.widgets import RunDetails

src = ScriptRunConfig(source_directory=".",
                      script='train.py',
                      arguments=["--json_config", "training_config.json"],
                      compute_target="zd",
                      environment=pytorch_env)
run = experiment.submit(src)
RunDetails(run).show()

Using 'method_whitelist' with Retry is deprecated and will be removed in v2.0. Use 'allowed_methods' instead
Using 'method_whitelist' with Retry is deprecated and will be removed in v2.0. Use 'allowed_methods' instead


_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 's…

In [44]:
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, uniform, PrimaryMetricGoal

param_sampling = RandomParameterSampling( {
        'lr': uniform(0.01, 1),
        'adam_epsilon': uniform(0.01, 1)
    }
)

early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)

hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling, 
                                     policy=early_termination_policy,
                                     primary_metric_name='eval_f1',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=2,
                                     max_concurrent_runs=4)

`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.


In [45]:
hyperdrive_run = experiment.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show()

Using 'method_whitelist' with Retry is deprecated and will be removed in v2.0. Use 'allowed_methods' instead


_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO',…

In [48]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

In [52]:
print(f'Best Run is:\n  eval_f1: {0:.5f} \n  learning rate: {1:.5f} \n  adam_epsilon: {2:.5f}'
.format(
        best_run_metrics['eval_f1'],
        best_run_metrics['lr'],
        best_run_metrics['adam_epsilon'])
     )

Best Run is:
  eval_f1: 0.00000 
  learning rate: 1.00000 
  adam_epsilon: 2.00000


In [53]:
model = best_run.register_model(model_name = 'ner', model_path = 'output/model.pt')
print(model.name, model.id, model.version, sep = '\t')

ModelPathNotFoundException: ModelPathNotFoundException:
	Message: Could not locate the provided model_path output/model.pt in the set of files uploaded to the run: ['azureml-logs/55_azureml-execution-tvmps_97affce7bef56e80625260f41201760ef556cae694e59cce369f33d274feab09_p.txt', 'azureml-logs/65_job_prep-tvmps_97affce7bef56e80625260f41201760ef556cae694e59cce369f33d274feab09_p.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_97affce7bef56e80625260f41201760ef556cae694e59cce369f33d274feab09_p.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/98_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log']
                See https://aka.ms/run-logging for more details.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Could not locate the provided model_path output/model.pt in the set of files uploaded to the run: ['azureml-logs/55_azureml-execution-tvmps_97affce7bef56e80625260f41201760ef556cae694e59cce369f33d274feab09_p.txt', 'azureml-logs/65_job_prep-tvmps_97affce7bef56e80625260f41201760ef556cae694e59cce369f33d274feab09_p.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_97affce7bef56e80625260f41201760ef556cae694e59cce369f33d274feab09_p.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/98_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log']\n                See https://aka.ms/run-logging for more details."
    }
}

In [13]:
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.webservice import Webservice
from azureml.core.model import Model

inference_config = InferenceConfig(entry_script="score.py", environment=pytorch_env)

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               tags={'data': 'text',  'method':'DistillBert-NER', 'framework':'pytorch'},
                                               description='Classify PER, LOC, ORG, and MISC')

service = Model.deploy(workspace=ws, 
                           name='ner_zd', 
                           models=[model], 
                           inference_config=inference_config, 
                           deployment_config=aciconfig)
service.wait_for_deployment(True)
print(service.state)

Registering model ner
2020/11/28 08:25:23 INFO mlflow.azureml: Registered an Azure Model with name: `ner` and version: `2`
2020/11/28 08:25:27 INFO mlflow.azureml: Deploying an Azure Webservice with name: `ner`
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
.................................................................................................................................................................................................
Failed
Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: a609b9cc-fec2-4b07-b413-6104c5cf6733
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease chec

WebserviceException: WebserviceException:
	Message: Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: a609b9cc-fec2-4b07-b413-6104c5cf6733
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: ner. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can also try to run image 91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e027211f1bd09d518bf0474dc082beee locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: ner. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can also try to run image 91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e027211f1bd09d518bf0474dc082beee locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information."
    },
    {
      "code": "AciDeploymentFailed",
      "message": "Your container application crashed. Please follow the steps to debug:\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\n3. View the diagnostic events to check status of container, it may help you to debug the issue. {\"restartCount\":6,\"currentState\":{\"state\":\"Waiting\",\"startTime\":null,\"exitCode\":null,\"finishTime\":null,\"detailStatus\":\"CrashLoopBackOff: Back-off restarting failed\"},\"previousState\":{\"state\":\"Terminated\",\"startTime\":\"2020-11-28T08:39:41.24Z\",\"exitCode\":111,\"finishTime\":\"2020-11-28T08:39:47.176Z\",\"detailStatus\":\"Error\"},\"events\":[{\"count\":1,\"firstTimestamp\":\"2020-11-28T08:31:12Z\",\"lastTimestamp\":\"2020-11-28T08:31:12Z\",\"name\":\"Pulling\",\"message\":\"pulling image \\\"91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e027211f1bd09d518bf0474dc082beee:latest\\\"\",\"type\":\"Normal\"},{\"count\":1,\"firstTimestamp\":\"2020-11-28T08:32:43Z\",\"lastTimestamp\":\"2020-11-28T08:32:43Z\",\"name\":\"Pulled\",\"message\":\"Successfully pulled image \\\"91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e027211f1bd09d518bf0474dc082beee:latest\\\"\",\"type\":\"Normal\"},{\"count\":7,\"firstTimestamp\":\"2020-11-28T08:33:40Z\",\"lastTimestamp\":\"2020-11-28T08:39:41Z\",\"name\":\"Started\",\"message\":\"Started container\",\"type\":\"Normal\"},{\"count\":7,\"firstTimestamp\":\"2020-11-28T08:33:54Z\",\"lastTimestamp\":\"2020-11-28T08:39:47Z\",\"name\":\"Killing\",\"message\":\"Killing container with id 9729aebe38f288bb4cf21a0c7cd5faa0394a6d6b4608245882821be76af2f5d0.\",\"type\":\"Normal\"}]}"
    }
  ]
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service deployment polling reached non-successful terminal state, current service state: Failed\nOperation ID: a609b9cc-fec2-4b07-b413-6104c5cf6733\nMore information can be found using '.get_logs()'\nError:\n{\n  \"code\": \"AciDeploymentFailed\",\n  \"message\": \"Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: ner. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can also try to run image 91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e027211f1bd09d518bf0474dc082beee locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\",\n  \"details\": [\n    {\n      \"code\": \"CrashLoopBackOff\",\n      \"message\": \"Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: ner. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can also try to run image 91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e027211f1bd09d518bf0474dc082beee locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\"\n    },\n    {\n      \"code\": \"AciDeploymentFailed\",\n      \"message\": \"Your container application crashed. Please follow the steps to debug:\\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\\n3. View the diagnostic events to check status of container, it may help you to debug the issue. {\\\"restartCount\\\":6,\\\"currentState\\\":{\\\"state\\\":\\\"Waiting\\\",\\\"startTime\\\":null,\\\"exitCode\\\":null,\\\"finishTime\\\":null,\\\"detailStatus\\\":\\\"CrashLoopBackOff: Back-off restarting failed\\\"},\\\"previousState\\\":{\\\"state\\\":\\\"Terminated\\\",\\\"startTime\\\":\\\"2020-11-28T08:39:41.24Z\\\",\\\"exitCode\\\":111,\\\"finishTime\\\":\\\"2020-11-28T08:39:47.176Z\\\",\\\"detailStatus\\\":\\\"Error\\\"},\\\"events\\\":[{\\\"count\\\":1,\\\"firstTimestamp\\\":\\\"2020-11-28T08:31:12Z\\\",\\\"lastTimestamp\\\":\\\"2020-11-28T08:31:12Z\\\",\\\"name\\\":\\\"Pulling\\\",\\\"message\\\":\\\"pulling image \\\\\\\"91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e027211f1bd09d518bf0474dc082beee:latest\\\\\\\"\\\",\\\"type\\\":\\\"Normal\\\"},{\\\"count\\\":1,\\\"firstTimestamp\\\":\\\"2020-11-28T08:32:43Z\\\",\\\"lastTimestamp\\\":\\\"2020-11-28T08:32:43Z\\\",\\\"name\\\":\\\"Pulled\\\",\\\"message\\\":\\\"Successfully pulled image \\\\\\\"91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e027211f1bd09d518bf0474dc082beee:latest\\\\\\\"\\\",\\\"type\\\":\\\"Normal\\\"},{\\\"count\\\":7,\\\"firstTimestamp\\\":\\\"2020-11-28T08:33:40Z\\\",\\\"lastTimestamp\\\":\\\"2020-11-28T08:39:41Z\\\",\\\"name\\\":\\\"Started\\\",\\\"message\\\":\\\"Started container\\\",\\\"type\\\":\\\"Normal\\\"},{\\\"count\\\":7,\\\"firstTimestamp\\\":\\\"2020-11-28T08:33:54Z\\\",\\\"lastTimestamp\\\":\\\"2020-11-28T08:39:47Z\\\",\\\"name\\\":\\\"Killing\\\",\\\"message\\\":\\\"Killing container with id 9729aebe38f288bb4cf21a0c7cd5faa0394a6d6b4608245882821be76af2f5d0.\\\",\\\"type\\\":\\\"Normal\\\"}]}\"\n    }\n  ]\n}"
    }
}

In [None]:
print("Scoring URI is: {}".format(webservice.scoring_uri))

In [None]:
import json
import requests

response = requests.post(url=webservice.scoring_uri, data=json.dumps("Steve went to London"),headers={"Content-type": "application/json"})
print(response.text)