### Generamos las credenciales

In [2]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

credential = DefaultAzureCredential()
ml_client = None
try:
    ml_client = MLClient.from_config(credential)
except Exception as ex:
    print(ex)
    # Enter details of your Azure Machine Learning workspace
    subscription_id = 'd86ba508-6a09-4959-8b9f-f076ad827658'
    resource_group = 'cloud-computing-module-iv'
    workspace_name = 'cloud-computing-project'
    ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)

Found the config file in: /config.json


### Definimos nuestro workspace

In [140]:
from azureml.core import Workspace
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

azure-ml-projecto-1
projecto-azire-machine-learning
eastus2
7b2e08d3-2780-4db1-b31f-5de9c87471ed


### Creamos el cluster (Si es necesario)
Estamos utilizando un cluster estandar sin GPU ya que nuestro problema se basa solamente en la clasificacion de personas si sobrevivieron al titanic o no.

E incluso este cluster que estamos utilizando es un grande para la cantidad de datos que estamos procesando

In [141]:
#create cluster
from azure.ai.ml.entities import AmlCompute
from azure.core.exceptions import ResourceNotFoundError

compute_name = "cpu-cluster"

try:
    _ = ml_client.compute.get(compute_name)
    print("Compute already exist")
except ResourceNotFoundError:
    compute_config = AmlCompute(
        name=compute_name,
        type="amlcompute",
        size="STANDARD_D2_V2",
        idle_time_before_scale_down=120,
        min_instances=0,
        max_instances=4
    )
    ml_client.begin_create_or_update(compute_config).result()

Compute already exist


### Separamos los datos en entrenamiento // Validacion

In [142]:
from azureml.core import Dataset, Datastore
from azure.ai.ml.entities import Data
import pandas as pd

datastore = Datastore.get(ws, "workspaceworkingdirectory")
dataset = Dataset.Tabular.from_delimited_files(path=(datastore, 'Users/wilson.ramirez/project-I/data/processed_titanic.csv'))

training_data, validation_data = dataset.random_split(percentage=0.8, seed=223)

training_data.to_pandas_dataframe().to_csv('./data/training_data/titanic_training.csv')
validation_data.to_pandas_dataframe().to_csv('./data/validation_data/titanic_validation.csv')

### Creamos los archivos MLTable para poder utilizar los datos

In [143]:
import mltable

paths = [
    {'file': './data/training_data/titanic_training.csv'}
]
train_table = mltable.from_delimited_files(paths)
train_table.save('./data/training_data')

paths = [
    {'file': './data/validation_data/titanic_validation.csv'}
]
train_table = mltable.from_delimited_files(paths)
train_table.save('./data/validation_data')

### Cargamos los datos para el entrenamiento del modelo

In [144]:
from azure.ai.ml import automl, Input
from azure.ai.ml.constants import AssetTypes

my_compute_name = "cpu-cluster"
my_exp_name = "titanic-survival-rate"
target_column = "Survived"

my_training_data_input = Input(
    type=AssetTypes.MLTABLE, path="./data/training_data"
)

my_validation_data_input = Input(
    type=AssetTypes.MLTABLE, path="./data/validation_data"
)

### Definimos los parametros para el entrenamiento del modelo

In [145]:
classification_job = automl.classification(
    compute=my_compute_name,
    experiment_name=my_exp_name,
    training_data=my_training_data_input,
    validation_data=my_validation_data_input,
    target_column_name=target_column,
    primary_metric="accuracy",
    enable_model_explainability=True
)

# Limits are all optional
classification_job.set_limits(
    timeout_minutes=60, 
    trial_timeout_minutes=10, 
    enable_early_termination=True,
    max_trials= 10
)

### Creamos el job para el entrenamiento

In [146]:
# Submit the AutoML job
returned_job = ml_client.jobs.create_or_update(
    classification_job
)  # submit the job to the backend

print(f"Created job: {returned_job}")

# Get a URL for the status of the job
returned_job.services["Studio"].endpoint

Uploading training_data (0.06 MBs): 100%|██████████| 57259/57259 [00:00<00:00, 443292.43it/s]


Uploading validation_data (0.02 MBs): 100%|██████████| 16341/16341 [00:00<00:00, 168495.18it/s]




Created job: compute: azureml:cpu-cluster
creation_context:
  created_at: '2023-10-29T10:25:04.955025+00:00'
  created_by: WILSON FABRICIO RAMIREZ SANDY
  created_by_type: User
display_name: dynamic_hair_lwry98101p
experiment_name: titanic-survival-rate
id: azureml:/subscriptions/7b2e08d3-2780-4db1-b31f-5de9c87471ed/resourceGroups/projecto-azire-machine-learning/providers/Microsoft.MachineLearningServices/workspaces/azure-ml-projecto-1/jobs/dynamic_hair_lwry98101p
limits:
  enable_early_termination: true
  max_concurrent_trials: 1
  max_cores_per_trial: -1
  max_nodes: 1
  max_trials: 10
  timeout_minutes: 60
  trial_timeout_minutes: 10
log_verbosity: info
name: dynamic_hair_lwry98101p
outputs: {}
primary_metric: accuracy
properties: {}
resources:
  instance_count: 1
  shm_size: 2g
services:
  Studio:
    endpoint: https://ml.azure.com/runs/dynamic_hair_lwry98101p?wsid=/subscriptions/7b2e08d3-2780-4db1-b31f-5de9c87471ed/resourcegroups/projecto-azire-machine-learning/workspaces/azure-ml

'https://ml.azure.com/runs/dynamic_hair_lwry98101p?wsid=/subscriptions/7b2e08d3-2780-4db1-b31f-5de9c87471ed/resourcegroups/projecto-azire-machine-learning/workspaces/azure-ml-projecto-1&tid=cc28633f-12b8-46cb-bc15-951dae239b4d'

### Trackeamos el job para elegir el mejor modelo

In [147]:
import mlflow

MLFLOW_TRACKING_URI = ml_client.workspaces.get(
    name= ml_client.workspace_name
).mlflow_tracking_uri

print(MLFLOW_TRACKING_URI)

azureml://eastus2.api.azureml.ms/mlflow/v1.0/subscriptions/7b2e08d3-2780-4db1-b31f-5de9c87471ed/resourceGroups/projecto-azire-machine-learning/providers/Microsoft.MachineLearningServices/workspaces/azure-ml-projecto-1


In [148]:
from mlflow.tracking.client import MlflowClient

mlflow_client = MlflowClient()

In [149]:
job_name = returned_job.name

mlflow_parent_run = mlflow_client.get_run(job_name)

print(mlflow_parent_run)

<Run: data=<RunData: metrics={'AUC_macro': 0.9491544724705346,
 'AUC_micro': 0.95981700059512,
 'AUC_weighted': 0.9491544724705346,
 'accuracy': 0.9119718309859155,
 'average_precision_score_macro': 0.9471592522261304,
 'average_precision_score_micro': 0.9600146422526947,
 'average_precision_score_weighted': 0.9559981963049825,
 'balanced_accuracy': 0.8916756818311222,
 'f1_score_macro': 0.8974092216105308,
 'f1_score_micro': 0.9119718309859155,
 'f1_score_weighted': 0.9112913352207107,
 'log_loss': 0.35628088388009954,
 'matthews_correlation': 0.7955621883404834,
 'norm_macro_recall': 0.7833513636622444,
 'precision_score_macro': 0.9039816772374911,
 'precision_score_micro': 0.9119718309859155,
 'precision_score_weighted': 0.9112584244012348,
 'recall_score_macro': 0.8916756818311222,
 'recall_score_micro': 0.9119718309859155,
 'recall_score_weighted': 0.9119718309859155,
 'weighted_accuracy': 0.9276301339775973}, params={}, tags={'automl_best_child_run_id': 'dynamic_hair_lwry98101p_8

In [150]:
print(mlflow_parent_run.data.tags.keys())

dict_keys(['model_explain_run', 'pipeline_id_000', 'score_000', 'predicted_cost_000', 'fit_time_000', 'training_percent_000', 'iteration_000', 'run_preprocessor_000', 'run_algorithm_000', 'automl_best_child_run_id', 'model_explain_best_run_child_id', 'mlflow.rootRunId', 'mlflow.runName', 'mlflow.user'])


### Sacamos el ID del mejor modelo

In [151]:
print(mlflow_parent_run.data.tags['automl_best_child_run_id'])

dynamic_hair_lwry98101p_8


In [152]:
best_child_run_id = mlflow_parent_run.data.tags['automl_best_child_run_id']
best_run = mlflow_client.get_run(best_child_run_id)

### Observamos las metricas para el mejor modelo

In [153]:
best_run.data.metrics

{'average_precision_score_weighted': 0.9559981963049825,
 'AUC_macro': 0.9491544724705346,
 'recall_score_micro': 0.9119718309859155,
 'balanced_accuracy': 0.8916756818311222,
 'f1_score_micro': 0.9119718309859155,
 'AUC_weighted': 0.9491544724705346,
 'precision_score_weighted': 0.9112584244012348,
 'f1_score_macro': 0.8974092216105308,
 'norm_macro_recall': 0.7833513636622444,
 'weighted_accuracy': 0.9276301339775973,
 'f1_score_weighted': 0.9112913352207107,
 'precision_score_micro': 0.9119718309859155,
 'log_loss': 0.35628088388009954,
 'average_precision_score_macro': 0.9471592522261304,
 'matthews_correlation': 0.7955621883404834,
 'recall_score_macro': 0.8916756818311222,
 'recall_score_weighted': 0.9119718309859155,
 'average_precision_score_micro': 0.9600146422526947,
 'accuracy': 0.9119718309859155,
 'AUC_micro': 0.95981700059512,
 'precision_score_macro': 0.9039816772374911}

### Descargamos los artefactos del modelo

In [154]:
import os
local_dir = "./artifact_download"
if not os.path.exists(local_dir):
    os.mkdir(local_dir)

In [155]:
local_path = mlflow_client.download_artifacts(
    best_run.info.run_id, "outputs", local_dir
)
local_path

  local_path = mlflow_client.download_artifacts(


'/mnt/batch/tasks/shared/LS_root/mounts/clusters/jupyter-compiler/code/Users/wilson.ramirez/project-I/artifact_download/outputs'

In [156]:
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    ProbeSettings
)

### Empezamos con la creacion del endpoint

In [157]:
import datetime
online_endpoint_name = 'titanic-survivors-'+datetime.datetime.now().strftime(
    "%m-%d-%-H%M"
)

endpoint = ManagedOnlineEndpoint(
    name = online_endpoint_name,
    description= "this is an endpoint for titanic survivability",
    auth_mode="key",
    tags={"Job": "Clasification job for predicting titanic survivavility"}
)
online_endpoint_name

'titanic-survivors-10-29-1047'

In [158]:
ml_client.begin_create_or_update(endpoint).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://titanic-survivors-10-29-1047.eastus2.inference.ml.azure.com/score', 'openapi_uri': 'https://titanic-survivors-10-29-1047.eastus2.inference.ml.azure.com/swagger.json', 'name': 'titanic-survivors-10-29-1047', 'description': 'this is an endpoint for titanic survivability', 'tags': {'Job': 'Clasification job for predicting titanic survivavility'}, 'properties': {'azureml.onlineendpointid': '/subscriptions/7b2e08d3-2780-4db1-b31f-5de9c87471ed/resourcegroups/projecto-azire-machine-learning/providers/microsoft.machinelearningservices/workspaces/azure-ml-projecto-1/onlineendpoints/titanic-survivors-10-29-1047', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/7b2e08d3-2780-4db1-b31f-5de9c87471ed/providers/Microsoft.MachineLearningServices/locations/eastus2/mfeOperationsStatus/oe:b4d6cf42-f975-46c7-be7c-4a853be77019:738ed802-781a-4403-9441-cb31283d3a52?api-v

In [160]:
model_name = "titanic-survivalillity"

model = Model(
    path=f"azureml://jobs/{best_run.info.run_id}/outputs/artifacts/outputs/mlflow-model/",
    name=model_name,
    description="my simple titanic survivability check model",
    type=AssetTypes.MLFLOW_MODEL
)

In [161]:
registered_model = ml_client.models.create_or_update(model)

In [162]:
registered_model.id

'/subscriptions/7b2e08d3-2780-4db1-b31f-5de9c87471ed/resourceGroups/projecto-azire-machine-learning/providers/Microsoft.MachineLearningServices/workspaces/azure-ml-projecto-1/models/titanic-survivalillity/versions/8'

In [163]:
from azure.ai.ml.entities import OnlineRequestSettings

req_timeout = OnlineRequestSettings(request_timeout_ms=90000)

deployment = ManagedOnlineDeployment(
    name="survivability-titanic-model",
    endpoint_name=online_endpoint_name,
    model=registered_model.id,
    instance_type="Standard_D2as_v4",
    instance_count=1,
    request_settings=req_timeout
)

### Desplegamos el modelo

In [165]:
ml_client.online_deployments.begin_create_or_update(deployment).result()

Check: endpoint titanic-survivors-10-29-1047 exists


.........................................................................

ManagedOnlineDeployment({'private_network_connection': None, 'provisioning_state': 'Succeeded', 'endpoint_name': 'titanic-survivors-10-29-1047', 'type': 'Managed', 'name': 'survivability-titanic-model', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/7b2e08d3-2780-4db1-b31f-5de9c87471ed/providers/Microsoft.MachineLearningServices/locations/eastus2/mfeOperationsStatus/od:b4d6cf42-f975-46c7-be7c-4a853be77019:49a1dc79-8d17-4a2d-af02-b92b2f288dff?api-version=2023-04-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/7b2e08d3-2780-4db1-b31f-5de9c87471ed/resourceGroups/projecto-azire-machine-learning/providers/Microsoft.MachineLearningServices/workspaces/azure-ml-projecto-1/onlineEndpoints/titanic-survivors-10-29-1047/deployments/survivability-titanic-model', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/jupyter-compiler/code/Users/wilson.ramirez/project-I', 'creation_

### Consumimos el modelo (Directo del cliente de azure)

In [20]:
import json
request_json = {
  "input_data": {
    "columns": [
      "Column2",
      "PassengerId",
      "Pclass",
      "Age",
      "SibSp",
      "Parch",
      "Fare",
      "Sex_female",
      "Sex_male",
      "Embarked_C",
      "Embarked_Q",
      "Embarked_S"
    ],
    "data": [
            [1,1,3,14,1,2,150,True,False,True,False,False]
        ]
    }
}

request_name = "request_titanic_survived_model.json"
with open(request_name, "w") as request_file:
    json.dump(request_json, request_file)

resp = ml_client.online_endpoints.invoke(
    endpoint_name = 'titanic-survivors-10-29-1047',
    deployment_name = "survivability-titanic-model",
    request_file=request_name
)
resp

'[1]'

### Consumimos el modelo (Mediante request Python)

In [21]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

data =  {
  "input_data": {
    "columns": [
      "Column2",
      "PassengerId",
      "Pclass",
      "Age",
      "SibSp",
      "Parch",
      "Fare",
      "Sex_female",
      "Sex_male",
      "Embarked_C",
      "Embarked_Q",
      "Embarked_S"
    ],
    "data": [
            [1,1,3,14,1,2,150,True,False,True,False,False]
        ]
    }
}

body = str.encode(json.dumps(data))

url = 'https://titanic-survivors-10-29-1047.eastus2.inference.ml.azure.com/score'
# Replace this with the primary/secondary key or AMLToken for the endpoint
api_key = 'CXVJwqBvCsyMJigwXs5AG80axj9gPdmD'
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'survivability-titanic-model' }

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))


b'[1]'
