In [27]:
import sys, os
import azureml.core
from azureml.core import Workspace

ws = Workspace.from_config()

print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')


zd-ner
scan-datasources-rg
southcentralus
d35ad7f3-a6be-400c-b1db-8b70322a01a0


In [28]:
from azureml.core import Experiment

experiment_name = "ner_new"
experiment = Experiment(ws, name=experiment_name)


In [29]:
from azureml.core import Environment

pytorch_env = Environment.from_conda_specification(name = 'pytorch-1.6-gpu', file_path = 'conda.yaml')

# Specify a GPU base image
pytorch_env.docker.enabled = True
pytorch_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04'

In [31]:
from azureml.core import ScriptRunConfig
from azureml.widgets import RunDetails

src = ScriptRunConfig(source_directory=".",
                      script='train.py',
                      arguments=["--json_config", "training_config.json"],
                      compute_target="zd-ner",
                      environment=pytorch_env)
run = experiment.submit(src)
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 's…

In [None]:
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, uniform, PrimaryMetricGoal

param_sampling = RandomParameterSampling( {
        'lr': uniform(0.01, 1),
        'adam_epsilon': uniform(0.01, 1)
    }
)

early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)

hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling, 
                                     policy=early_termination_policy,
                                     primary_metric_name='eval_f1',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=2,
                                     max_concurrent_runs=4)

In [None]:
hyperdrive_run = experiment.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show()

In [None]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

In [None]:
print(f'Best Run is:\n  eval_f1: {0:.5f} \n  learning rate: {1:.5f} \n  adam_epsilon: {2:.5f}'
.format(
        best_run_metrics['eval_f1'],
        best_run_metrics['lr'],
        best_run_metrics['adam_epsilon'])
     )

In [32]:
best_run = azureml.core.ScriptRun(experiment=Experiment(ws, name="ner"),run_id="ner_new_1606688308_21ae6f00")

In [33]:
model = best_run.register_model(model_name = 'ner', model_path = './outputs')
print(model.name, model.id, model.version, sep = '\t')

ner	ner:9	9


In [55]:
#remote deploy
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.webservice import Webservice
from azureml.core.model import Model

dockerfile = r"""
FROM mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04
RUN python -c "import nltk; nltk.download('punkt')"
"""

env = Environment.from_conda_specification(name="env", file_path="conda.yaml")
env.docker.base_image = None
env.docker.base_dockerfile = dockerfile

inference_config = InferenceConfig(entry_script="score.py", environment=env)

aciconfig = AciWebservice.deploy_configuration(cpu_cores=2, 
                                               memory_gb=10, 
                                               tags={'data': 'text',  'method':'DistillBert-NER', 'framework':'pytorch'},
                                               description='Classify PER, LOC, ORG, and MISC')

service = Model.deploy(workspace=ws, 
                           name='ner', 
                           models=[model], 
                           inference_config=inference_config, 
                           deployment_config=aciconfig)
service.wait_for_deployment(True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...................................................................
Failed
Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 3ac074d6-5cbf-418c-9b75-e9258e0b04c2
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: nertenscript. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can also try to run image 91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e0a06ef6921b9e6c8f7cc64a6347e096 locally. Please refer to https://aka.ms/debugimag

WebserviceException: WebserviceException:
	Message: Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 3ac074d6-5cbf-418c-9b75-e9258e0b04c2
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: nertenscript. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can also try to run image 91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e0a06ef6921b9e6c8f7cc64a6347e096 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: nertenscript. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can also try to run image 91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e0a06ef6921b9e6c8f7cc64a6347e096 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information."
    },
    {
      "code": "AciDeploymentFailed",
      "message": "Your container application crashed. Please follow the steps to debug:\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\n3. View the diagnostic events to check status of container, it may help you to debug the issue. {\"restartCount\":4,\"currentState\":{\"state\":\"Waiting\",\"startTime\":null,\"exitCode\":null,\"finishTime\":null,\"detailStatus\":\"CrashLoopBackOff: Back-off 1m20s restarting failed\"},\"previousState\":{\"state\":\"Terminated\",\"startTime\":\"2020-11-29T23:36:39Z\",\"exitCode\":111,\"finishTime\":\"2020-11-29T23:36:43Z\",\"detailStatus\":\"Error\"},\"events\":[{\"count\":2,\"firstTimestamp\":\"2020-11-29T23:32:08Z\",\"lastTimestamp\":\"2020-11-29T23:35:02Z\",\"name\":\"Pulling\",\"message\":\"pulling image \\\"91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e0a06ef6921b9e6c8f7cc64a6347e096\\\"\",\"type\":\"Normal\"},{\"count\":1,\"firstTimestamp\":\"2020-11-29T23:34:51Z\",\"lastTimestamp\":\"2020-11-29T23:34:51Z\",\"name\":\"Pulled\",\"message\":\"Successfully pulled image \\\"91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e0a06ef6921b9e6c8f7cc64a6347e096\\\"\",\"type\":\"Normal\"},{\"count\":1,\"firstTimestamp\":\"2020-11-29T23:34:52Z\",\"lastTimestamp\":\"2020-11-29T23:34:52Z\",\"name\":\"Created\",\"message\":\"Created container\",\"type\":\"Normal\"},{\"count\":1,\"firstTimestamp\":\"2020-11-29T23:34:52Z\",\"lastTimestamp\":\"2020-11-29T23:34:52Z\",\"name\":\"Started\",\"message\":\"Started container\",\"type\":\"Normal\"},{\"count\":6,\"firstTimestamp\":\"2020-11-29T23:35:07Z\",\"lastTimestamp\":\"2020-11-29T23:36:24Z\",\"name\":\"BackOff\",\"message\":\"Back-off restarting failed container\",\"type\":\"Warning\"}]}"
    }
  ]
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service deployment polling reached non-successful terminal state, current service state: Failed\nOperation ID: 3ac074d6-5cbf-418c-9b75-e9258e0b04c2\nMore information can be found using '.get_logs()'\nError:\n{\n  \"code\": \"AciDeploymentFailed\",\n  \"message\": \"Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: nertenscript. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can also try to run image 91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e0a06ef6921b9e6c8f7cc64a6347e096 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\",\n  \"details\": [\n    {\n      \"code\": \"CrashLoopBackOff\",\n      \"message\": \"Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: nertenscript. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can also try to run image 91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e0a06ef6921b9e6c8f7cc64a6347e096 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\"\n    },\n    {\n      \"code\": \"AciDeploymentFailed\",\n      \"message\": \"Your container application crashed. Please follow the steps to debug:\\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\\n3. View the diagnostic events to check status of container, it may help you to debug the issue. {\\\"restartCount\\\":4,\\\"currentState\\\":{\\\"state\\\":\\\"Waiting\\\",\\\"startTime\\\":null,\\\"exitCode\\\":null,\\\"finishTime\\\":null,\\\"detailStatus\\\":\\\"CrashLoopBackOff: Back-off 1m20s restarting failed\\\"},\\\"previousState\\\":{\\\"state\\\":\\\"Terminated\\\",\\\"startTime\\\":\\\"2020-11-29T23:36:39Z\\\",\\\"exitCode\\\":111,\\\"finishTime\\\":\\\"2020-11-29T23:36:43Z\\\",\\\"detailStatus\\\":\\\"Error\\\"},\\\"events\\\":[{\\\"count\\\":2,\\\"firstTimestamp\\\":\\\"2020-11-29T23:32:08Z\\\",\\\"lastTimestamp\\\":\\\"2020-11-29T23:35:02Z\\\",\\\"name\\\":\\\"Pulling\\\",\\\"message\\\":\\\"pulling image \\\\\\\"91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e0a06ef6921b9e6c8f7cc64a6347e096\\\\\\\"\\\",\\\"type\\\":\\\"Normal\\\"},{\\\"count\\\":1,\\\"firstTimestamp\\\":\\\"2020-11-29T23:34:51Z\\\",\\\"lastTimestamp\\\":\\\"2020-11-29T23:34:51Z\\\",\\\"name\\\":\\\"Pulled\\\",\\\"message\\\":\\\"Successfully pulled image \\\\\\\"91000ffe28554e43abd795dd11872a0e.azurecr.io/azureml/azureml_e0a06ef6921b9e6c8f7cc64a6347e096\\\\\\\"\\\",\\\"type\\\":\\\"Normal\\\"},{\\\"count\\\":1,\\\"firstTimestamp\\\":\\\"2020-11-29T23:34:52Z\\\",\\\"lastTimestamp\\\":\\\"2020-11-29T23:34:52Z\\\",\\\"name\\\":\\\"Created\\\",\\\"message\\\":\\\"Created container\\\",\\\"type\\\":\\\"Normal\\\"},{\\\"count\\\":1,\\\"firstTimestamp\\\":\\\"2020-11-29T23:34:52Z\\\",\\\"lastTimestamp\\\":\\\"2020-11-29T23:34:52Z\\\",\\\"name\\\":\\\"Started\\\",\\\"message\\\":\\\"Started container\\\",\\\"type\\\":\\\"Normal\\\"},{\\\"count\\\":6,\\\"firstTimestamp\\\":\\\"2020-11-29T23:35:07Z\\\",\\\"lastTimestamp\\\":\\\"2020-11-29T23:36:24Z\\\",\\\"name\\\":\\\"BackOff\\\",\\\"message\\\":\\\"Back-off restarting failed container\\\",\\\"type\\\":\\\"Warning\\\"}]}\"\n    }\n  ]\n}"
    }
}

In [51]:
service.get_logs()

'Error in environment creation, more details may be found here: https://zdner7033739836.blob.core.windows.net/azureml/ImageLogs/19fd262d-863d-4e5a-8397-98a10fffe002/build.log?sv=2019-02-02&sr=b&sig=%2FnQH4g1mD5mELgZ9ZhQ8j9MZaZ7%2FaUsEO%2B8ZWulP4%2BU%3D&st=2020-11-29T23%3A08%3A47Z&se=2020-11-30T07%3A13%3A47Z&sp=r '

In [47]:
print("Scoring URI is: {}".format(service.scoring_uri))

Scoring URI is: None


In [38]:
import json
import requests

response = requests.post(url=service.scoring_uri, data=json.dumps("Steve went to London"),headers={"Content-type": "application/json"})
print(response.text)




**********************************************************************
  Resource [93mpunkt[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt/PY3/english.pickle[0m

  Searched in:
    - '/root/nltk_data'
    - '/azureml-envs/azureml_6a6384407022fe87d6a235546547dbe0/nltk_data'
    - '/azureml-envs/azureml_6a6384407022fe87d6a235546547dbe0/share/nltk_data'
    - '/azureml-envs/azureml_6a6384407022fe87d6a235546547dbe0/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
    - ''
**********************************************************************

