In [None]:
# ------------------------------------------------------------------------------
# This is generated from https://ml.azure.com/visualinterface/authoring/Normal/26757c1b-0cee-46d2-be4c-7e37f38bf54c?wsid=/subscriptions/4aaa645c-5ae2-4ae9-a17a-84b9023bc56a/resourcegroups/itp-pilot-ResGrp/workspaces/itp-pilot&tid=72f988bf-86f1-41af-91ab-2d7cd011db47
# Prerequisite: install azureml-pipeline-wrapper (version >= 0.1.0.17494938)
# ------------------------------------------------------------------------------

In [1]:
from azureml.core import Workspace
from azureml.pipeline.wrapper import Pipeline, Module, dsl

In [2]:
# configure aml workspace
ws = Workspace.from_config(path='./config.json')

In [3]:
# get modules
remove_duplicate_rows = Module.load(ws, namespace='azureml', name='Remove Duplicate Rows')
train_model = Module.load(ws, namespace='azureml', name='Train Model')
split_data = Module.load(ws, namespace='azureml', name='Split Data')
normalize_data = Module.load(ws, namespace='azureml', name='Normalize Data')
two_class_decision_forest = Module.load(ws, namespace='azureml', name='Two-Class Decision Forest')
join_data = Module.load(ws, namespace='azureml', name='Join Data')
score_model = Module.load(ws, namespace='azureml', name='Score Model')
evaluate_model = Module.load(ws, namespace='azureml', name='Evaluate Model')

In [4]:
#tlc_train_anonymous = Module.load(ws, id='be3aac0f-f058-4c75-b74b-4d17dd3b5c05')
#tlc_test_anonymous = Module.load(ws, id='281f5a57-daed-4e53-b1d5-e7b86fc381f5')

In [5]:
# get dataset
from azureml.core import Dataset
botdetectionlabeldata = Dataset.get_by_name(ws, name='BotDetectionLabelData', version=1)
botdetectionfeaturedata = Dataset.get_by_name(ws, name='BotDetectionFeatureData', version=1)

In [6]:
# define pipeline
@dsl.pipeline(name='Bot-detection', description='bot detection', default_compute_target=('k80-16-b', 'Cmk8s'), default_datastore='workspaceblobstore')
def generated_pipeline():
    remove_duplicate_rows_0 = remove_duplicate_rows(
        dataset=botdetectionlabeldata,
        key_column_selection_filter_expression='[{"KeepInputDataOrder":true,"ColumnNames":["Id"]}]',
        retain_first_duplicate_row=True)
    # remove_duplicate_rows_0.runsettings.target=''
    remove_duplicate_rows_0.inputs.dataset.configure(mode='download')
    
    remove_duplicate_rows_1 = remove_duplicate_rows(
        dataset=botdetectionfeaturedata,
        key_column_selection_filter_expression='[{"KeepInputDataOrder":true,"ColumnNames":["Id"]}]',
        retain_first_duplicate_row=True)
    # remove_duplicate_rows_1.runsettings.target=''
    remove_duplicate_rows_1.inputs.dataset.configure(mode='download')
    
    join_data_0 = join_data(
        left_dataset=remove_duplicate_rows_0.outputs.results_dataset,
        right_dataset=remove_duplicate_rows_1.outputs.results_dataset,
        comma_separated_case_sensitive_names_of_join_key_columns_for_l='[{"KeepInputDataOrder":true,"ColumnNames":["Id"]}]',
        comma_separated_case_sensitive_names_of_join_key_columns_for_r='[{"KeepInputDataOrder":true,"ColumnNames":["Id"]}]',
        match_case=True,
        join_type='Inner Join',
        keep_right_key_columns_in_joined_table=False)
    # join_data_0.runsettings.target=''
    
    normalize_data_0 = normalize_data(
        dataset=join_data_0.outputs.results_dataset,
        transformation_method='MinMax',
        use_0_for_constant_columns_when_checked=True,
        columns_to_transform='[{"KeepInputDataOrder":true,"ColumnNames":["BD_NormalizedImpressionWithFDAuthUserCount"]}]')
    # normalize_data_0.runsettings.target=''
    
    split_data_0 = split_data(
        dataset=normalize_data_0.outputs.transformed_dataset,
        splitting_mode='Split Rows',
        fraction_of_rows_in_the_first_output_dataset=0.9,
        randomized_split=True,
        random_seed=0,
        stratified_split='False',
        stratification_key_column='')
    # split_data_0.runsettings.target=''
        
    two_class_decision_forest_0 = two_class_decision_forest(
        create_trainer_mode='SingleParameter',
        number_of_decision_trees=8,
        maximum_depth_of_the_decision_trees=32,
        minimum_number_of_samples_per_leaf_node=1,
        resampling_method='Bagging Resampling')
    # two_class_decision_forest_0.runsettings.target=''
    
    train_model_0 = train_model(
        dataset=split_data_0.outputs.results_dataset1,
        untrained_model=two_class_decision_forest_0.outputs.untrained_model,
        label_column='[{"KeepInputDataOrder":true,"ColumnNames":["Label"]}]')
    # train_model_0.runsettings.target=''
    
    score_model_0 = score_model(
        trained_model=train_model_0.outputs.trained_model,
        dataset=split_data_0.outputs.results_dataset2,
        append_score_columns_to_output=True)
    # score_model_0.runsettings.target=''
    
    evaluate_model_0 = evaluate_model(
        scored_dataset=score_model_0.outputs.scored_dataset)
    # evaluate_model_0.runsettings.target=''

In [7]:
# create a pipeline
pipeline = generated_pipeline()

In [8]:
# validate pipeline and visualize the graph
pipeline.validate()

<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_4c8f90db-50e4-4d88-b5cc-d8f40fd65403_widget', env_json='{"subscription…

{'result': 'validation passed', 'errors': []}

In [9]:
# submit a pipeline run
pipeline.submit(experiment_name='bot-detection').wait_for_completion()

Submitted PipelineRun c3c54317-a433-438c-88a7-e9a9f7ab9c4a
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/bot-detection/runs/c3c54317-a433-438c-88a7-e9a9f7ab9c4a?wsid=/subscriptions/4aaa645c-5ae2-4ae9-a17a-84b9023bc56a/resourcegroups/itp-pilot-ResGrp/workspaces/itp-pilot
PipelineRunId: c3c54317-a433-438c-88a7-e9a9f7ab9c4a
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/bot-detection/runs/c3c54317-a433-438c-88a7-e9a9f7ab9c4a?wsid=/subscriptions/4aaa645c-5ae2-4ae9-a17a-84b9023bc56a/resourcegroups/itp-pilot-ResGrp/workspaces/itp-pilot


<IPython.core.display.Javascript object>

ValidateView(container_id='container_id_f5891cc6-d145-4695-b733-621a01412257_widget', env_json='{}', graph_jso…

<RunStatus.failed: 'Failed'>