In [2]:
from azureml.core import Workspace

In [None]:
ws= Workspace.create(name='Azureml-SDK-brillio',
                     subscription_id="",
                     resource_group="",
                     create_resource_group=True,
                     location="")
ws.write_config(path="./config")

In [None]:
ws=Workspace.from_config("./config")

In [None]:
from azureml.core import Datastore
az_store = Datastore.register_azure_blob_container(workspace=ws,
                                                    datastore_name="azure_sdk_blob01",
                                                    account_name="",
                                                    container_name="",
                                                    account_key="")

In [None]:
az_store= Datastore.get(ws,"azure_sdk_blob01")

In [None]:
from azureml.core import Dataset
csv_path= [(az_store,"folder_name/file_name")]
dataset= Dataset.Tabular.from_delimited_files(path=csv_path)
#register the dataset
dataset = dataset.register(workspace=ws,
                            name="",
                            create_new_version=True)

In [None]:
ws_list=Workspace.list(subscription_id="")
ws_list=list(ws_list)
az_default_store= ws.get_default_datastore()
az_dataset=Dataset.get_by_name(ws,"")
ds_list=list(ws.datasets.keys())
for items in ds_list:
    print(items)

In [None]:
ws=Workspace.from_config("./config")
az_store= Datastore.get(ws,"azure_sdk_blob01")
az_dataset=Dataset.get_by_name(ws,"")
az_default_store= ws.get_default_datastore()

In [None]:
df= az_dataset.to_pandas_dataframe()
df_sub=df[["col1","col2"]]

In [None]:
az_ds_from_df = Dataset.Tabular.register_pandas_dataframe(dataframe=df_sub,
                                                          target=az_store,
                                                          name="datset name")

In [None]:
files_list=["./data/test.csv","./data/test1.csv"]
files_list= az_store.upload_files(files=files_list,
                                   target_path="folder_name/",
                                relative_root="./data/",
                                overwrite=True)

In [None]:
az_store.upload(src_dir="./data",
                target_path="folder_name/data",
                overwrite=True)

In [5]:
from azureml.core import Experiment
experiment=Experiment(workspace=ws,name="azureml_exp1")

In [None]:
new_run= experiment.start_logging()
total_observations=len(df)
null_df=df.isnull().sum()
new_run.log("Total observations: ",total_observations)
for columns in df.columns:
    new_run.log(columns,nulldf[columns])
new_run.complete()

# running experiment with script

In [6]:
from azureml.core import Workspace, Experiment, ScriptRunConfig
new_experiment=Experiment(workspace=ws,name="azureml_exp2")
script_config= ScriptRunConfig(source_directory=".",
                               script="basicscript.py")
new_run = new_experiment.submit(config=script_config)
new_run.wait_for_completion()


# creating custom environment

In [7]:
from azureml.core import Environment
from azureml.core.environment import CondaDependencies

In [None]:
myenv= Environment(name="MyEnvironment")
myenv_dep= CondaDependencies.create(conda_packages=['scikit-learn'])
myenv.python.conda_dependencies = myenv_dep
myenv.register(ws)

In [None]:
script_config = ScriptRunConfig(source_directory=".",
                                script="TrainingScript.py",
                                environment=myenv) 
new_run1 = new_experiment.submit(config=script_config)
new_run1.wait_for_completion()

In [None]:
from azureml.core.compute import AmlCompute 
cluster_name= "vikascluster"
if cluster_name not in ws.compute_targets:
    compute_config= AmlCompute.provisioning_configuration(vm_size="",
                                                        max_nodes=2)
    cluster = AmlCompute.create(ws, cluster_name, compute_config)
    cluster.wait_for_completion()
else:
    cluster= ws.compute_targets[cluster_name]
print(cluster," compute cluster found.. using it....")

In [None]:
from azureml.core.compute import ComputeTarget
compute_cluster= ComputeTarget.create(ws, cluster_name, compute_config)
compute_cluster.wait_for_completion()

In [None]:
from azureml.core.runconfig import RunConfiguration 
run_config = RunConfiguration()
run_config.target= compute_cluster 
run_config.environment = myenv 

# Creating pipeline

In [None]:
from azureml.pipeline.steps import PythonScriptStep 
from azureml.pipeline.core import PipelineData 
input_ds= ws.datasets.get("dataset name")
datafolder= PipelineData('datafolder', datastore=ws.get_default_datastore())


In [None]:
dataprep_step= PythonScriptStep(name="Datapreparation",
                                source_directory=".",
                                script_name= "Dataprep_pipeline.py",
                                inputs=[input_ds.as_named_input('raw_data')],
                                outputs=[datafolder],
                                runconfig=run_config,
                                arguments=['--datafolder', datafolder])

In [None]:
train_step= PythonScriptStep(name="ModelTraining",
                             source_directory=".",
                             script_name="Training_pipeline.py",
                             inputs=[datafolder],
                             runconfig=run_config,
                             arguments=['--datafolder',datafolder])

In [None]:
steps= [dataprep_step,train_step]

In [None]:
from azureml.pipeline.core import Pipeline 
new_pipeline = Pipeline(workspace=ws, steps=steps)

In [None]:
new_experiment= Experiment(workspace=ws, name="pipelineexp01")
pipeline_exp_run= new_experiment.submit(new_pipeline)
pipeline_exp_run.wait_for_completion(show_output=True)

# Automl configuration

In [None]:
from azureml.train.automl import AutoMLConfig
automl_config= AutoMLConfig(task='classification',
                            compute_target=cluster,
                            training_data=input_ds,
                            validation_size=0.3,
                            label_column_name="",
                            primary_metric="norm_macro_recall",
                            iterations=10,
                            max_concurrent_iterations=2,
                            experiment_timeout_hours=0.25,
                            featurization='auto')

In [None]:
from azureml.core.experiment import Experiment
new_exp= Experiment(ws,"exp_name")
print("submitting the exxperiment")
new_run= new_exp.submit(automl_config)
new_run.wait_for_completion(show_output=True)

In [None]:
best_child_run= new_run.get_best_child()
print(best_child_run)

In [None]:
for run in new_run.get_children():
    print("run id: ",run.id)
    print("accuracy is: ",run.get_metrics['accuracy'])
    print("norm macro recall: ",run.get_metrics['norm_macro_recall'])

# Hyper parameter Tuning

In [None]:
myenv= Environment(name="MyEnvironment")
myenv_dep= CondaDependencies.create(conda_packages=['scikit-learn','pip'],
                                    pip_packages=['azureml-defaults','azureml-interpret'])
myenv.python.conda_dependencies = myenv_dep
myenv.register(ws)

In [None]:
from azureml.core.compute import AmlCompute 
cluster_name= "vikascluster"
if cluster_name not in ws.compute_targets:
    compute_config= AmlCompute.provisioning_configuration(vm_size="",
                                                        max_nodes=2)
    cluster = AmlCompute.create(ws, cluster_name, compute_config)
    cluster.wait_for_completion()
else:
    cluster= ws.compute_targets[cluster_name]
print(cluster," compute cluster found.. using it....")

In [None]:
from azureml.core import ScriptRunConfig 
script_config = ScriptRunConfig(source_directory=".",
                                script="hyperdrivescript.py",
                                arguments=['--input-data',input_ds.as_named_input('raw_input')],
                                environment=myenv,
                                compute_target=cluster)

In [None]:
from azureml.train.hyperdrive import GridParameterSampling, choice
hyper_params= GridParameterSampling(
                                    {'--n_estimators': choice(10,20,30,100),
                                    '--min_samples_leaf': choice(1,2,3)})

In [None]:
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal
hyper_config = HyperDriveConfig(run_config=script_config,
                                hyperparameter_sampling=hyper_params,
                                policy=None,
                                primary_metric_name='accuracy',
                                primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                max_total_runs=20,
                                max_concurrent_runs=2)
from azureml.core.experiment import Experiment
new_exp= Experiment(ws,"exp_name")
print("submitting the exxperiment")
new_run= new_exp.submit(hyper_config)
new_run.wait_for_completion(show_output=True)


In [None]:
best_run = new_run.get_best_run_by_primary_metric() 
print("best run id: ",best_run.id)
print(best_run.get_metrics())

# Model Explainability

In [8]:
!pip install azureml-explain-model
!pip install azureml-interpret

In [None]:
# try to write in any script
from interpret.ext.blackbox import TabularExplainer
classes=["",""]
features = list(X.columns)
tab_explainer = TabularExplainer(trained_model,
                                x_train,
                                features=features,
                                classes=classes)

In [None]:
global_explanation= tab_explainer.explain_global(x_train)
global_fi = global_explanation.get_feature_importance_dict()
print(global_fi)

In [None]:
x_explain= x_test[0:5]
local_explanation=tab_explainer.explain_local(x_explain)
local_features= local_explanation.get_ranked_local_names()
local_importance = local_explanation.get_ranked_local_values() 

In [None]:
from azureml.interpret import ExplanationClient 
explain_client.upload_model_explanation(global_explanation,comment="My First Explanation")

In [None]:
ws= Workspace.from_config("./config")
new_run= ws.get_run("run id")
explain_client = ExplanationClient.from_run(new_run)
downloaded_explanation = explain_client.download_model_explanation()
feature_importances=downloaded_explanation.get_feature_importance_dict()

# Registering The Model

In [None]:
from azureml.core import Workspace, Model
ws= Workspace.from_config("./config")
new_run= ws.get_run("run id")
new_run.register_model(model_path='outputs/models.pkl',
                        model_name='vikasmodel',
                        tags={'source':'sdk-run','algorithm':'Randomforest'},
                        properties={'Accuracy':new_run.get_metrics()['accuracy']},
                        description="combined models from the run")


# Displaying all models from registry

In [None]:
models=Model.list(ws)
for model in Model.list(ws):
    print("\n",model.name, 'version: ',model.version)
    print("model run id ",model.run_id)
    

# Diploying the model in Kubernetes service

In [None]:
from azureml.core.compute import AksCompute, ComputeTarget
cluster_name= 'aks-cluster-vikas'
aks_config = AksCompute.provisioning_configuration(location='region_name',
                                                    vm_size='name',
                                                    agent_count=1,
                                                    cluster_purpose='Dev/Test')
production_cluster= ComputeTarget.create(ws, cluster_name, aks_config)
production_cluster.wait_for_completion(show_output=True)

In [None]:
from azureml.core.model import InferenceConfig
infernece_config = InferenceConfig(environment=myenv,
                                    entry_script='scoring_script.py',
                                    source_directory='./service_files')

In [None]:
from azureml.core.webservice import AksWebservice
deploy_config = AksWebservice.deploy_configuration(cpu_cores=1,memory_gb=1)

In [None]:
model=ws.models['model_name']
service = Model.deploy(workspace=ws,
                        name='model-service',
                        models=[model],
                        inference_config=infernece_config,
                        deployment_config=deploy_config,
                        deployment_target=production_cluster)
service.wait_for_deployment(show_output=True)