In [1]:
import azureml.core

from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core import Workspace, Environment, Datastore, Experiment
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.runconfig import RunConfiguration

from azureml.exceptions import WebserviceException
from azureml.data.data_reference import DataReference

from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import PipelineData, Pipeline
import json
import os


In [2]:
with open('config.json', 'r') as jsonfile:
    ws_config = json.load(jsonfile)

interactive_auth = InteractiveLoginAuthentication(tenant_id=ws_config['tenantId'])

ws = Workspace(
    subscription_id=ws_config['subscription_id'],
    resource_group=ws_config['resource_group'],
    workspace_name=ws_config['workspace_name'],
    auth=interactive_auth,
)

Performing interactive authentication. Please follow the instructions on the terminal.
Interactive authentication successfully completed.


Note, we have launched a browser for you to login. For old experience with device code, use "az login --use-device-code"
You have logged in. Now let us find all the subscriptions to which you have access...


In [3]:
blob_datastore_name='shiftdatastore' # Name of the datastore to workspace
container_name=os.getenv("BLOB_CONTAINER", "news20container") # Name of Azure blob container
account_name=os.getenv("BLOB_ACCOUNTNAME", "shiftreference") # Storage account name
account_key=os.getenv("AZURE_STORAGE_KEY") # Storage account key

try:
    datastore = Datastore.get(ws, blob_datastore_name)
except:
    datastore = Datastore.register_azure_blob_container(workspace=ws, 
                                                             datastore_name=blob_datastore_name, 
                                                             container_name=container_name, 
                                                             account_name=account_name,
                                                             account_key=account_key)


In [4]:
blob_input_data = DataReference(
    datastore,
    data_reference_name="rawdata",
    path_on_datastore="rawdata",
)

# Preprocessed files saved here
corpus_output_data = PipelineData(
    "corpus",
    datastore=datastore,
    output_path_on_compute="corpus",
)

In [5]:
compute_name = "corpus-compute"
vm_size = "STANDARD_D11_V2"
if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('Found compute target: ' + compute_name)
else:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,  # STANDARD_NC6 is GPU-enabled
                                                                min_nodes=0,
                                                                max_nodes=4)
    # create the compute target
    compute_target = ComputeTarget.create(
        ws, compute_name, provisioning_config)

    # Can poll for a minimum number of nodes and for a specific timeout.
    # If no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # For a more detailed view of current cluster status, use the 'status' property
    print(compute_target.status.serialize())



Found compute target: corpus-compute


In [6]:
env = Environment.from_pip_requirements("sbsdeployment", "./requirements.txt")
runconfig = RunConfiguration()
runconfig.environment = env


In [7]:

process_arguments = ["--input", blob_input_data, "--output", corpus_output_data]
process_step = PythonScriptStep(
    script_name="build_corpus.py",
    arguments=process_arguments,
    inputs=[blob_input_data],
    outputs=[corpus_output_data],
    compute_target=compute_target,
    source_directory=os.getcwd(),
    runconfig=runconfig,
    allow_reuse=True,
)

In [8]:
predictions = Pipeline(ws, steps=[process_step])
predictions_run = Experiment(ws, "build_corpus").submit(predictions)
predictions_run.wait_for_completion()

Created step build_corpus.py [26bd74d3][f537e9c3-9d2d-47b6-9531-6b77406b6cd5], (This step will run and generate new outputs)
Using data reference rawdata for StepId [7789452d][395508bc-299b-4d65-a244-73b2834045d8], (Consumers of this data are eligible to reuse prior runs.)
Submitted PipelineRun 358eff6f-51a4-4665-8e63-1b58aa3bf834
Link to Azure Machine Learning studio: https://ml.azure.com/experiments/build_corpus/runs/358eff6f-51a4-4665-8e63-1b58aa3bf834?wsid=/subscriptions/9017d57d-c4df-480d-b92d-7aea2266b0f0/resourcegroups/BAA_Shift/workspaces/News20Workspace
PipelineRunId: 358eff6f-51a4-4665-8e63-1b58aa3bf834
Link to Portal: https://ml.azure.com/experiments/build_corpus/runs/358eff6f-51a4-4665-8e63-1b58aa3bf834?wsid=/subscriptions/9017d57d-c4df-480d-b92d-7aea2266b0f0/resourcegroups/BAA_Shift/workspaces/News20Workspace
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 179a896a-31a5-4470-9461-a2855fada77e
Link to Portal: https://ml.azure.com/experiments/build_co

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "User program failed with RecursionError: maximum recursion depth exceeded",
        "detailsUri": "https://aka.ms/azureml-known-errors",
        "details": [],
        "debugInfo": {
            "type": "RecursionError",
            "message": "maximum recursion depth exceeded",
            "stackTrace": "  File \"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/azureml-setup/context_manager_injector.py\", line 127, in execute_with_context\n    runpy.run_path(sys.argv[0], globals(), run_name=\"__main__\")\n  File \"/azureml-envs/azureml_9143bcd3812a650ac057f5da7dec6d2f/lib/python3.6/runpy.py\", line 263, in run_path\n    pkg_name=pkg_name, script_name=fname)\n  File \"/azureml-envs/azureml_9143bcd3812a650ac057f5da7dec6d2f/lib/python3.6/runpy.py\", line 96, in _run_module_code\n    mod_name, mod_spec, pkg_name, script_name)\n  File \"/azureml-envs/azureml_9143bcd3812a650ac057f5da7dec6d2f/lib/python3.6/runpy.py\", line 85, in _run_code\n    exec(code, run_globals)\n  File \"build_corpus.py\", line 44, in <module>\n    process_avis_corpus(args.input, output)\n  File \"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\", line 116, in process_avis_corpus\n    process_avis_corpus(inputdir, output)\n  File \"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\", line 116, in process_avis_corpus\n    process_avis_corpus(inputdir, output)\n  File \"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\", line 116, in process_avis_corpus\n    process_avis_corpus(inputdir, output)\n  [Previous line repeated 987 more times]\n  File \"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\", line 115, in process_avis_corpus\n    _maybe_download_norsk_aviskorpus(inputdir)\n  File \"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\", line 109, in _maybe_download_norsk_aviskorpus\n    archive_path = os.path.join(inputpath, AVIS_CORSPUS_ARCHIVE)\n  File \"/azureml-envs/azureml_9143bcd3812a650ac057f5da7dec6d2f/lib/python3.6/posixpath.py\", line 79, in join\n    sep = _get_sep(a)\n"
        }
    },
    "time": "0001-01-01T00:00:00.000Z"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"User program failed with RecursionError: maximum recursion depth exceeded\",\n        \"detailsUri\": \"https://aka.ms/azureml-known-errors\",\n        \"details\": [],\n        \"debugInfo\": {\n            \"type\": \"RecursionError\",\n            \"message\": \"maximum recursion depth exceeded\",\n            \"stackTrace\": \"  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/azureml-setup/context_manager_injector.py\\\", line 127, in execute_with_context\\n    runpy.run_path(sys.argv[0], globals(), run_name=\\\"__main__\\\")\\n  File \\\"/azureml-envs/azureml_9143bcd3812a650ac057f5da7dec6d2f/lib/python3.6/runpy.py\\\", line 263, in run_path\\n    pkg_name=pkg_name, script_name=fname)\\n  File \\\"/azureml-envs/azureml_9143bcd3812a650ac057f5da7dec6d2f/lib/python3.6/runpy.py\\\", line 96, in _run_module_code\\n    mod_name, mod_spec, pkg_name, script_name)\\n  File \\\"/azureml-envs/azureml_9143bcd3812a650ac057f5da7dec6d2f/lib/python3.6/runpy.py\\\", line 85, in _run_code\\n    exec(code, run_globals)\\n  File \\\"build_corpus.py\\\", line 44, in <module>\\n    process_avis_corpus(args.input, output)\\n  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\\\", line 116, in process_avis_corpus\\n    process_avis_corpus(inputdir, output)\\n  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\\\", line 116, in process_avis_corpus\\n    process_avis_corpus(inputdir, output)\\n  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\\\", line 116, in process_avis_corpus\\n    process_avis_corpus(inputdir, output)\\n  [Previous line repeated 987 more times]\\n  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\\\", line 115, in process_avis_corpus\\n    _maybe_download_norsk_aviskorpus(inputdir)\\n  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/news20workspace/2659cbbbfb4f4a62a5bc10740292fc53/179a896a-31a5-4470-9461-a2855fada77e/mounts/workspaceblobstore/azureml/179a896a-31a5-4470-9461-a2855fada77e/process_avis_corpus.py\\\", line 109, in _maybe_download_norsk_aviskorpus\\n    archive_path = os.path.join(inputpath, AVIS_CORSPUS_ARCHIVE)\\n  File \\\"/azureml-envs/azureml_9143bcd3812a650ac057f5da7dec6d2f/lib/python3.6/posixpath.py\\\", line 79, in join\\n    sep = _get_sep(a)\\n\"\n        }\n    },\n    \"time\": \"0001-01-01T00:00:00.000Z\"\n}"
    }
}

In [None]:

print('done')