In the next cell, enter your Subscription ID, Resource Group name and Workspace name. To find these values:

1. In the upper right Azure Machine Learning studio toolbar, select your workspace name.
1. Copy the value for workspace, resource group and subscription ID into the code.  
1. You'll need to copy one value, close the area and paste, then come back for the next one.

![image of workspace credentials](./media/find-credentials.png)

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# authenticate
credential = DefaultAzureCredential()

SUBSCRIPTION = "<YOUR_SUBSCRIPTION_ID>"
RESOURCE_GROUP = "<YOUR_RESOURCE_GROUP_NAME>"
WS_NAME = "<YOUR_WORKSPACE_NAME>"
# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WS_NAME,
)

The client initialization is lazy, it will wait for the first time it needs to make a call (this will happen in the next code cell).

In [None]:
# Verify that the handle works correctly.
# If you ge an error here, modify your SUBSCRIPTION, RESOURCE_GROUP, and WS_NAME in the previous cell.
ws = ml_client.workspaces.get(WS_NAME)
print(ws.location, ":", ws.resource_group)

## Create training script

In [42]:
import os

train_src_dir = "./src"
os.makedirs(train_src_dir, exist_ok=True)

This script handles the preprocessing of the data, splitting it into test and train data. It then consumes this data to train a model and return the output model. 

In [43]:
%%writefile {train_src_dir}/main.py
import argparse
import lightgbm as lgb
import mlflow
import mlflow.sklearn
import numpy as np
import os
import pandas as pd
import polars as pl
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

def main():
    """Main function of the script."""

    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="path to input data")
    parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
    parser.add_argument("--n_estimators", required=False, default=100, type=int)
    parser.add_argument("--learning_rate", required=False, default=0.1, type=float)
    parser.add_argument("--registered_model_name", type=str, help="model name")
    args = parser.parse_args()
   
    # Start Logging
    mlflow.start_run()

    # enable autologging
    mlflow.sklearn.autolog()

    ###################
    #<prepare the data>
    ###################
    print(" ".join(f"{k}={v}" for k, v in vars(args).items()))
    print("input data:", args.data)

    df = pl.read_excel(source="./data/full_join.xlsx", sheet_name="Sheet1")
    df = df.with_columns([
        pl.col("CostImpact (€)").cast(pl.Float64, strict=False),
    ])

    mlflow.log_metric("num_samples", df.shape[0])
    mlflow.log_metric("num_features",df.shape[1] - 1)

    df_input = (
        df
        .filter(pl.col("PackagingQuality").is_in(["Bad", "Good"])) 
        .sort("DateOfReport")                                      
        .select([                                                 
            "SupplierName",
            "GarmentType",
            "Material",
            "Weight",
            "ProposedUnitsPerCarton",
            "ProposedFoldingMethod",
            "ProposedLayout",
            "Size",
            "Collection",
            "PackagingQuality"
        ])
    )
    # Convert Polars to Pandas
    df_pd = df_input.to_pandas()

    # Encode target variable
    df_pd["PackagingQuality"] = df_pd["PackagingQuality"].map({"Good": 1, "Bad": 0})

    # Define features and target
    X = df_pd.drop(columns=["PackagingQuality"])
    y = df_pd["PackagingQuality"]
    categorical_features = X.select_dtypes(include="object").columns.tolist()

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.30, random_state=42, stratify=y
    )
    ####################
    #</prepare the data>
    ####################

    ##################
    #<train the model>
    ##################
    #weights
    class_weights = np.where(y_train == 1, 1, 4)  

    # Instantiate the LightGBM classifier
    model_lgb = lgb.LGBMClassifier(
        objective='binary',
        metric='auc',
        boosting_type='gbdt',
        n_estimators=1000,
        learning_rate=0.05,
        random_state=42
    )

    for c in categorical_features:
        X_train[c] = X_train[c].astype("category")
        X_test[c] = X_test[c].astype("category")

    actual_categorical_in_train = [col for col in categorical_features if col in X_train.columns and X_train[col].dtype.name == 'category']

    # Train the model
    model_lgb.fit(X_train, y_train, sample_weight=class_weights,categorical_feature=actual_categorical_in_train)

    # Make predictions on the test set
    y_pred_lgb = model_lgb.predict(X_test)
    y_proba_lgb = model_lgb.predict_proba(X_test)[:, 1]

    print(classification_report(y_test, y_pred_lgb))
    ###################
    #</train the model>
    ###################

    ##########################
    #<save and register model>
    ##########################
    # Registering the model to the workspace
    print("Registering the model via MLFlow")
    mlflow.sklearn.log_model(
        sk_model=model_lgb,
        registered_model_name="packaging_quality_default_model",
        artifact_path="packaging_quality_default_model",
    )

    # Saving the model to a file
    mlflow.sklearn.save_model(
        sk_model=model_lgb,
        path=os.path.join("packaging_quality_default_model", "trained_model"),
    )
    ###########################
    #</save and register model>
    ###########################
    
    # Stop Logging
    mlflow.end_run()

if __name__ == "__main__":
    main()

Overwriting ./src/main.py


In [None]:
from azure.ai.ml import command
from azure.ai.ml import Input

registered_model_name = "packaging_quality_default_model"

job = command(
    inputs=dict(
        test_train_ratio=0.2,
        learning_rate=0.25,
        registered_model_name=registered_model_name,
    ),
    code="./src/",  # location of source code
    command="python main.py --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}",
    environment="capstone-v2@latest",
    display_name="packaging_quality_default_prediction",
)

## Submit the job 

In [None]:
ml_client.create_or_update(job)

## Create a new online endpoint

In [None]:
# Creating a unique name for the endpoint
online_endpoint_name = "pckg-quality-endpoint"

Create the endpoint:

In [None]:
# Expect the endpoint creation to take a few minutes
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
)

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="sample online endpoint for ML model deployment",
    auth_mode="key",
    tags={
        "training_dataset": "packaging_quality_full_join",
        "model_type": "lightgbm.LGBMClassifier",
    },
)

endpoint = ml_client.online_endpoints.begin_create_or_update(endpoint).result()

print(f"Endpoint {endpoint.name} provisioning state: {endpoint.provisioning_state}")

Endpoint pckg-quality-endpoint provisioning state: Succeeded


In [63]:
endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

print(
    f'Endpoint "{endpoint.name}" with provisioning state "{endpoint.provisioning_state}" is retrieved'
)

Endpoint "pckg-quality-endpoint" with provisioning state "Succeeded" is retrieved


## Deploy the model to the endpoint

In [72]:
registered_model_name = "packaging_quality_default_model"

In [73]:
# Let's pick the latest version of the model
latest_model_version = max(
    [int(m.version) for m in ml_client.models.list(name=registered_model_name)]
)
print(f'Latest model is version "{latest_model_version}" ')

Latest model is version "1" 


Deploy the latest version of the model.  

In [74]:
from azure.ai.ml.entities import ManagedOnlineDeployment, CodeConfiguration, Environment

In [75]:
env = ml_client.environments.get(name="packaging_quality_default_model", version="4")

In [None]:
# picking the model to deploy. Here we use the latest version of our registered model
model = ml_client.models.get(name=registered_model_name, version=latest_model_version)

blue = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    code_configuration=CodeConfiguration(
        code="./src",
        scoring_script="score.py"
    ),
    environment=env,
    instance_type="Standard_DS3_v2",
    instance_count=1,
)

blue_deployment = ml_client.begin_create_or_update(blue).result()

Check: endpoint pckg-quality-endpoint exists
Uploading src (53.57 MBs): 100%|██████████| 53572876/53572876 [00:00<00:00, 88021084.27it/s]




..............................................................

In [31]:
# picking the model to deploy. Here we use the latest version of our registered model
model = ml_client.models.get(name=registered_model_name, version=latest_model_version)

# Expect this deployment to take approximately 6 to 8 minutes.
# create an online deployment.
# if you run into an out of quota error, change the instance_type to a comparable VM that is available.
# Learn more on https://azure.microsoft.com/en-us/pricing/details/machine-learning/.
green_deployment = ManagedOnlineDeployment(
    name="green",
    endpoint_name=online_endpoint_name,
    model=model,
    instance_type="Standard_DS3_v2",
    instance_count=1,
)

blue_deployment = ml_client.begin_create_or_update(green_deployment).result()

Check: endpoint pckg-quality-endpoint-v4 exists


..................................................................................

### Test with a sample query

In [None]:
deploy_dir = "./deploy"
os.makedirs(deploy_dir, exist_ok=True)

In [None]:
%%writefile {deploy_dir}/sample-request.json
{
  "input_data": {
    "index": [0, 1],
    "columns": [
      "SupplierName",
      "GarmentType",
      "Material",
      "Weight",
      "ProposedUnitsPerCarton",
      "ProposedFoldingMethod",
      "ProposedLayout",
      "Size",
      "Collection"
    ],
    "data": [
      ["SupplierC", "Shorts", "Polyester", 0.22, 31, "Method1", "LayoutA", "XS", "Spring"],
      ["SupplierH", "Shirt", "Polyester", 0.16, 45, "Method2", "LayoutA", "L", "Summer"],
      ["SupplierC", "Skirt", "Cotton", 0.30, 30, "Method2", "LayoutC", "M", "Summer"],
      ["SupplierA", "Coat", "Cotton", 1.29, 6, "Method1", "LayoutD", "M", "Spring"],
      ["SupplierA", "Jacket", "Cotton", 0.91, 14, "Method1", "LayoutC", "L", "Spring"]
    ]
  }
}

In [None]:
# test the blue deployment with some sample data
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file="./deploy/sample-request.json",
    deployment_name="blue",
)