diff --git a/cli/jobs/single-step/on-behalf-of/README.md b/cli/jobs/single-step/on-behalf-of/README.md new file mode 100644 index 0000000000..3cdf32b7c3 --- /dev/null +++ b/cli/jobs/single-step/on-behalf-of/README.md @@ -0,0 +1,49 @@ +--- +page_type: sample +languages: +- azurecli +- python +products: +- azure-machine-learning +description: An official sample showcasing use of on-behalf-of feature in AzureML. Using this feature customers can use their AAD identity from within training script to perform any operations only limited by their access, like creating another AzureML Job or reading secrets from a key vault in a secure way. +--- + +# AzureML - On Behalf of Feature +AzureML On Behalf of (OBO) is a powerful feature which allows AzureML users to use their AAD Identity within the training script of a remote Job (a job that runs on a remote compute). + +## Why should you use it ? + +AzureML makes your AAD identity available inside training script. Any resource you can access when running code on your machine can be accessed from the training script running on a remote compute. + +## How do I use AzureML On Behalf of (OBO) ? + +There are 2 things that are required to use OBO feature: +- Specify in Job definition you want to use AzureML OBO. +- Use `AzureMLOnBehalfOfCredential` credential class in training script + +### Step 1: Specify in Job definition I want to use AzureML OBO +This is as easy as adding below section to your job definition: + +```yaml +identity: + type: user_identity +``` + +[Job.yaml](job.yaml) from [on behalf of](../on-behalf-of/) shows how a Job definition specifies to use OBO feature. + +### Step 2: Use `AzureMLOnBehalfOfCredential` credential class in training script + +`AzureMLOnBehalfOfCredential` credential class in a part of `azure-ai-ml` package and can be used with any client that accepts credential class from `azure-identity` package. In your training script use this credential class with client of resources you would like to access. + +Code snipped below shows how `AzureMLOnBehalfOfCredential` can be used to access azure key vault. +```python +from azure.ai.ml.identity import AzureMLOnBehalfOfCredential +from azure.keyvault.secrets import SecretClient + + +credential = AzureMLOnBehalfOfCredential() +secret_client = SecretClient(vault_url="https://my-key-vault.vault.azure.net/", credential=credential) +secret = secret_client.get_secret("secret-name") +``` + +[Training script](../on-behalf-of/src/aml_run.py) from [on behalf of](../on-behalf-of/) sample show how `AzureMLOnBehalfOfCredential` is used to create a job in AzureML from within the training script. diff --git a/cli/jobs/single-step/on-behalf-of/environment/conda.yml b/cli/jobs/single-step/on-behalf-of/environment/conda.yml new file mode 100644 index 0000000000..5b16c235a4 --- /dev/null +++ b/cli/jobs/single-step/on-behalf-of/environment/conda.yml @@ -0,0 +1,7 @@ +channels: + - anaconda + - conda-forge +dependencies: + - python=3.8.12 + - pip: + - azure-ai-ml diff --git a/cli/jobs/single-step/on-behalf-of/job.yaml b/cli/jobs/single-step/on-behalf-of/job.yaml new file mode 100644 index 0000000000..80362af525 --- /dev/null +++ b/cli/jobs/single-step/on-behalf-of/job.yaml @@ -0,0 +1,23 @@ +command: >- + python aml_run.py + --iris-csv ${{inputs.iris_csv}} + --C ${{inputs.C}} + --kernel ${{inputs.kernel}} + --coef0 ${{inputs.coef0}} +inputs: + iris_csv: + type: uri_file + path: wasbs://datasets@azuremlexamples.blob.core.windows.net/iris.csv + C: 0.8 + kernel: "rbf" + coef0: 0.1 +code: ./src +environment: + image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20220113.v1 + conda_file: environment/conda.yml +display_name: "obo-test" +compute: "azureml:cpu-cluster" +experiment_name: obo-sample +identity: # Only available on AMLCompute + type: user_identity + diff --git a/cli/jobs/single-step/on-behalf-of/src/aml_run.py b/cli/jobs/single-step/on-behalf-of/src/aml_run.py new file mode 100644 index 0000000000..d32d80bc95 --- /dev/null +++ b/cli/jobs/single-step/on-behalf-of/src/aml_run.py @@ -0,0 +1,92 @@ +# imports +import os +import argparse +from azure.ai.ml.identity import AzureMLOnBehalfOfCredential +from azure.ai.ml import MLClient, Input, command +import logging + +logger = logging.getLogger(__name__) + + +# define functions +def main(args): + job_definition = get_job_definition(args) + ml_client = get_ml_client(args) + returned_job = ml_client.jobs.create_or_update(job_definition) + print(returned_job) + + +def get_ml_client(args): + credential = AzureMLOnBehalfOfCredential() + subscription_id = os.environ["AZUREML_ARM_SUBSCRIPTION"] + resource_group = os.environ["AZUREML_ARM_RESOURCEGROUP"] + workspace_name = os.environ["AZUREML_ARM_WORKSPACE_NAME"] + + client = MLClient( + credential=credential, + subscription_id=subscription_id, + resource_group_name=resource_group, + workspace_name=workspace_name, + ) + return client + + +def get_job_definition(args): + job = command( + code="./src", # local path where the code is stored + command="python main.py --iris-csv ${{inputs.iris}} --C ${{inputs.C}} --kernel ${{inputs.kernel}} --coef0 ${{inputs.coef0}}", + inputs={ + "iris": Input( + type="uri_file", + path="https://azuremlexamples.blob.core.windows.net/datasets/iris.csv", + ), + "C": args.C, + "kernel": args.kernel, + "coef0": args.coef0, + }, + environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:22", + compute="cpu-cluster", + display_name="sklearn-iris-example", + description="sklearn iris example", + tags={"starter_run": os.environ.get("MLFLOW_RUN_ID")}, + ) + + return job + + +def parse_args(): + # setup arg parser + parser = argparse.ArgumentParser() + + # add arguments + parser.add_argument("--iris-csv", type=str) + parser.add_argument("--C", type=float, default=1.0) + parser.add_argument("--kernel", type=str, default="rbf") + parser.add_argument("--degree", type=int, default=3) + parser.add_argument("--gamma", type=str, default="scale") + parser.add_argument("--coef0", type=float, default=0) + parser.add_argument("--shrinking", type=bool, default=False) + parser.add_argument("--probability", type=bool, default=False) + parser.add_argument("--tol", type=float, default=1e-3) + parser.add_argument("--cache_size", type=float, default=1024) + parser.add_argument("--class_weight", type=dict, default=None) + parser.add_argument("--verbose", type=bool, default=False) + parser.add_argument("--max_iter", type=int, default=-1) + parser.add_argument("--decision_function_shape", type=str, default="ovr") + parser.add_argument("--break_ties", type=bool, default=False) + parser.add_argument("--random_state", type=int, default=42) + + # parse args + args = parser.parse_args() + + # return args + return args + + +# run script +if __name__ == "__main__": + # parse args + args = parse_args() + + # run main function + main(args) diff --git a/cli/jobs/single-step/on-behalf-of/src/src/main.py b/cli/jobs/single-step/on-behalf-of/src/src/main.py new file mode 100644 index 0000000000..01b5d305d7 --- /dev/null +++ b/cli/jobs/single-step/on-behalf-of/src/src/main.py @@ -0,0 +1,103 @@ +# imports +import os +import mlflow +import argparse + +import pandas as pd + +from sklearn.svm import SVC +from sklearn.model_selection import train_test_split + +# define functions +def main(args): + # enable auto logging + mlflow.autolog() + + # setup parameters + params = { + "C": args.C, + "kernel": args.kernel, + "degree": args.degree, + "gamma": args.gamma, + "coef0": args.coef0, + "shrinking": args.shrinking, + "probability": args.probability, + "tol": args.tol, + "cache_size": args.cache_size, + "class_weight": args.class_weight, + "verbose": args.verbose, + "max_iter": args.max_iter, + "decision_function_shape": args.decision_function_shape, + "break_ties": args.break_ties, + "random_state": args.random_state, + } + + # read in data + df = pd.read_csv(args.iris_csv) + + # process data + X_train, X_test, y_train, y_test = process_data(df, args.random_state) + + # train model + model = train_model(params, X_train, X_test, y_train, y_test) + + +def process_data(df, random_state): + # split dataframe into X and y + X = df.drop(["species"], axis=1) + y = df["species"] + + # train/test split + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=random_state + ) + + # return split data + return X_train, X_test, y_train, y_test + + +def train_model(params, X_train, X_test, y_train, y_test): + # train model + model = SVC(**params) + model = model.fit(X_train, y_train) + + # return model + return model + + +def parse_args(): + # setup arg parser + parser = argparse.ArgumentParser() + + # add arguments + parser.add_argument("--iris-csv", type=str) + parser.add_argument("--C", type=float, default=1.0) + parser.add_argument("--kernel", type=str, default="rbf") + parser.add_argument("--degree", type=int, default=3) + parser.add_argument("--gamma", type=str, default="scale") + parser.add_argument("--coef0", type=float, default=0) + parser.add_argument("--shrinking", type=bool, default=False) + parser.add_argument("--probability", type=bool, default=False) + parser.add_argument("--tol", type=float, default=1e-3) + parser.add_argument("--cache_size", type=float, default=1024) + parser.add_argument("--class_weight", type=dict, default=None) + parser.add_argument("--verbose", type=bool, default=False) + parser.add_argument("--max_iter", type=int, default=-1) + parser.add_argument("--decision_function_shape", type=str, default="ovr") + parser.add_argument("--break_ties", type=bool, default=False) + parser.add_argument("--random_state", type=int, default=42) + + # parse args + args = parser.parse_args() + + # return args + return args + + +# run script +if __name__ == "__main__": + # parse args + args = parse_args() + + # run main function + main(args)