In [1]:
from azureml.core import Workspace, Run, Dataset
from azureml.pipeline.wrapper import Pipeline, Module

ws = Workspace.get(name='kubeflow_ws_1', subscription_id='74eccef0-4b8d-4f83-b5f9-fa100d155b22', resource_group='kubeflow-demo')

In [16]:
data = Dataset.get_by_name(ws, name='Movie_Rating_FileDataset')

In [17]:
StratifiedSplitter = Module.load(ws, namespace='microsoft.com/cat', name='Stratified Splitter')
SARTraining = Module.load(ws, namespace='microsoft.com/cat', name='SAR Training')
SARScoring = Module.load(ws, namespace='microsoft.com/cat', name='SAR Scoring')
RecallAtK = Module.load(ws, namespace='microsoft.com/cat', name='Recall at K')
MAP = Module.load(ws, namespace='microsoft.com/cat', name='MAP')
nDCG = Module.load(ws, namespace='microsoft.com/cat', name='nDCG')
PrecisionAtK = Module.load(ws, namespace='microsoft.com/cat', name='Precision at K')

help(StratifiedSplitter)

Help on function microsoft.com/cat://Stratified Splitter in microsoft:

microsoft.com/cat://Stratified Splitter(Input_path: 'Input path' = None, Ratio: 'Ratio' = '0.75', User_column: 'User column' = 'UserId', Item_column: 'Item column' = 'MovieId', Seed: 'Seed' = '42')
    Python stratified splitter from Recommenders repo: https://github.com/Microsoft/Recommenders.
    
    
    :param Input_path: Input path
    :param Ratio: Ratio
    :param User_column: User column
    :param Item_column: Item column
    :param Seed: Seed



In [18]:
splitter = StratifiedSplitter(Ratio=0.75, User_column='UserId', Item_column='MovieId', Seed=42).set_inputs(Input_path=data)

training = SARTraining(User_column='UserId', Item_column='MovieId', Rating_column='Rating', Timestamp_column='Timestamp', Time_decay=False, Normalize=False).set_inputs(Input_path=splitter.outputs.Output_train_data)

scoring = SARScoring(Score_type='Item recommendation', Ranking_metric='Rating', Top_k=10, Sort_top_k=True).set_inputs(Dataset_to_score=splitter.outputs.Output_test_data, Trained_model=training.outputs.Output_model)

recall = RecallAtK(User_column='UserId', Item_column='MovieId', Rating_column='Rating', Prediction_column='prediction', Relevancy_method='top_k', Top_k=10, Threshold=10.0).set_inputs(Rating_true=splitter.outputs.Output_test_data, Rating_pred=scoring.outputs.Score_result)

map = MAP(User_column='UserId', Item_column='MovieId', Rating_column='Rating', Prediction_column='prediction', Relevancy_method='top_k', Top_k=10, Threshold=10.0).set_inputs(Rating_true=scoring.outputs.Score_result, Rating_pred=scoring.outputs.Score_result)

precision = PrecisionAtK(User_column='UserId', Item_column='MovieId', Rating_column='Rating', Prediction_column='prediction', Relevancy_method='top_k', Top_k=10, Threshold=10.0).set_inputs(Rating_true=splitter.outputs.Output_test_data, Rating_pred=scoring.outputs.Score_result)

ndcg = nDCG(User_column='UserId', Item_column='MovieId', Rating_column='Rating', Prediction_column='prediction', Relevancy_method='top_k', Top_k=10, Threshold=10.0).set_inputs(Rating_true=splitter.outputs.Output_test_data, Rating_pred=scoring.outputs.Score_result)

In [19]:
pipeline = Pipeline(nodes=[splitter, training, scoring, recall, map, precision, ndcg], outputs=ndcg.outputs, name='recommender pipeline')
 
run = pipeline.submit_run(
    ws, 
    experiment_name='jietong', 
    default_compute_target='default'
#    default_compute_target='kubeflow-aks'
)

run.wait_for_completion()

Submitted PipelineRun c1012a78-194a-4048-acda-7dfa1401dddf
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/jietong/runs/c1012a78-194a-4048-acda-7dfa1401dddf?wsid=/subscriptions/74eccef0-4b8d-4f83-b5f9-fa100d155b22/resourcegroups/kubeflow-demo/workspaces/kubeflow_ws_1
PipelineRunId: c1012a78-194a-4048-acda-7dfa1401dddf
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/jietong/runs/c1012a78-194a-4048-acda-7dfa1401dddf?wsid=/subscriptions/74eccef0-4b8d-4f83-b5f9-fa100d155b22/resourcegroups/kubeflow-demo/workspaces/kubeflow_ws_1
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 8aed2b33-a546-480b-abc2-d00fbc463632
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/jietong/runs/8aed2b33-a546-480b-abc2-d00fbc463632?wsid=/subscriptions/74eccef0-4b8d-4f83-b5f9-fa100d155b22/resourcegroups/kubeflow-demo/workspaces/kubeflow_ws_1
StepRun( Stratified Splitter ) Status: NotStarted
StepRun( Stratified Splitter ) 

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "ServiceError",
        "message": "Dataset initialization failed: Could not import package \"azureml-dataprep\". Please ensure it is installed by running: pip install \"azureml-dataprep[fuse,pandas]\"",
        "details": [],
        "debugInfo": {
            "type": "ImportError",
            "message": "Could not import package \"azureml-dataprep\". Please ensure it is installed by running: pip install \"azureml-dataprep[fuse,pandas]\"",
            "stackTrace": "  File \"/mnt/batch/tasks/shared/LS_root/jobs/kubeflow_ws_1/azureml/8aed2b33-a546-480b-abc2-d00fbc463632/mounts/workspaceblobstore/azureml/8aed2b33-a546-480b-abc2-d00fbc463632/azureml-setup/context_manager_injector.py\", line 44, in __enter__\n    self.context_manager.__enter__()\n  File \"/mnt/batch/tasks/shared/LS_root/jobs/kubeflow_ws_1/azureml/8aed2b33-a546-480b-abc2-d00fbc463632/mounts/workspaceblobstore/azureml/8aed2b33-a546-480b-abc2-d00fbc463632/azureml-setup/context_managers.py\", line 230, in __enter__\n    self.datasets.__enter__()\n  File \"/azureml-envs/azureml_f64010c353ffcd7cfceb8fb0477f522e/lib/python3.7/site-packages/azureml/data/context_managers.py\", line 118, in __enter__\n    context_manager = dataset.mount(mount_point=target_path)\n  File \"/azureml-envs/azureml_f64010c353ffcd7cfceb8fb0477f522e/lib/python3.7/site-packages/azureml/data/_loggerfactory.py\", line 78, in wrapper\n    return func(*args, **kwargs)\n  File \"/azureml-envs/azureml_f64010c353ffcd7cfceb8fb0477f522e/lib/python3.7/site-packages/azureml/data/file_dataset.py\", line 177, in mount\n    mount = dataprep_fuse().mount\n  File \"/azureml-envs/azureml_f64010c353ffcd7cfceb8fb0477f522e/lib/python3.7/site-packages/azureml/data/_dataprep_helper.py\", line 51, in dataprep_fuse\n    raise ImportError(_dataprep_missing_error)\n"
        },
        "messageParameters": {}
    },
    "time": "0001-01-01T00:00:00.000Z"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"ServiceError\",\n        \"message\": \"Dataset initialization failed: Could not import package \\\"azureml-dataprep\\\". Please ensure it is installed by running: pip install \\\"azureml-dataprep[fuse,pandas]\\\"\",\n        \"details\": [],\n        \"debugInfo\": {\n            \"type\": \"ImportError\",\n            \"message\": \"Could not import package \\\"azureml-dataprep\\\". Please ensure it is installed by running: pip install \\\"azureml-dataprep[fuse,pandas]\\\"\",\n            \"stackTrace\": \"  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/kubeflow_ws_1/azureml/8aed2b33-a546-480b-abc2-d00fbc463632/mounts/workspaceblobstore/azureml/8aed2b33-a546-480b-abc2-d00fbc463632/azureml-setup/context_manager_injector.py\\\", line 44, in __enter__\\n    self.context_manager.__enter__()\\n  File \\\"/mnt/batch/tasks/shared/LS_root/jobs/kubeflow_ws_1/azureml/8aed2b33-a546-480b-abc2-d00fbc463632/mounts/workspaceblobstore/azureml/8aed2b33-a546-480b-abc2-d00fbc463632/azureml-setup/context_managers.py\\\", line 230, in __enter__\\n    self.datasets.__enter__()\\n  File \\\"/azureml-envs/azureml_f64010c353ffcd7cfceb8fb0477f522e/lib/python3.7/site-packages/azureml/data/context_managers.py\\\", line 118, in __enter__\\n    context_manager = dataset.mount(mount_point=target_path)\\n  File \\\"/azureml-envs/azureml_f64010c353ffcd7cfceb8fb0477f522e/lib/python3.7/site-packages/azureml/data/_loggerfactory.py\\\", line 78, in wrapper\\n    return func(*args, **kwargs)\\n  File \\\"/azureml-envs/azureml_f64010c353ffcd7cfceb8fb0477f522e/lib/python3.7/site-packages/azureml/data/file_dataset.py\\\", line 177, in mount\\n    mount = dataprep_fuse().mount\\n  File \\\"/azureml-envs/azureml_f64010c353ffcd7cfceb8fb0477f522e/lib/python3.7/site-packages/azureml/data/_dataprep_helper.py\\\", line 51, in dataprep_fuse\\n    raise ImportError(_dataprep_missing_error)\\n\"\n        },\n        \"messageParameters\": {}\n    },\n    \"time\": \"0001-01-01T00:00:00.000Z\"\n}"
    }
}