# AAI540 - Module 5 Assignment

Victor Hugo Germano

## ML System Observability 

In [12]:
from datetime import datetime, timedelta, timezone
import json
import os
import re
import boto3
from time import sleep
from threading import Thread

import pandas as pd

from sagemaker import get_execution_role,  Session, image_uris
from sagemaker.s3 import S3Downloader, S3Uploader
from sagemaker.processing import ProcessingJob
from sagemaker.serializers import CSVSerializer

from sagemaker.model import Model
from sagemaker.model_monitor import DataCaptureConfig

# Defining model and quality monitoring

From m5

In [13]:

role = sagemaker.get_execution_role()
session = sagemaker.Session()
region = sess.boto_region_name

bucket = sess.default_bucket()
prefix = "model-bias-monitoring"

print(f"Bucket: {bucket}")
print(f"Role: {role}")
print(f"Region: {region}")

Bucket: sagemaker-us-east-1-440542329720
Role: arn:aws:iam::440542329720:role/LabRole
Region: us-east-1


In [14]:
# Setup S3 bucket
# You can use a different bucket, but make sure the role you chose for this notebook
# has the s3:PutObject permissions. This is the bucket into which the data is captured
bucket = session.default_bucket()
print("Demo Bucket:", bucket)
prefix = "sagemaker/Churn-ModelQualityMonitor-20201201"

##S3 prefixes
data_capture_prefix = f"{prefix}/datacapture"
s3_capture_upload_path = f"s3://{bucket}/{data_capture_prefix}"

ground_truth_upload_path = (
    f"s3://{bucket}/{prefix}/ground_truth_data/{datetime.now():%Y-%m-%d-%H-%M-%S}"
)

reports_prefix = f"{prefix}/reports"
s3_report_path = f"s3://{bucket}/{reports_prefix}"

##Get the model monitor image
monitor_image_uri = image_uris.retrieve(framework="model-monitor", region=region)

print("Image URI:", monitor_image_uri)
print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")
print(f"Report path: {s3_report_path}")

Demo Bucket: sagemaker-us-east-1-440542329720
Image URI: 156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer
Capture path: s3://sagemaker-us-east-1-440542329720/sagemaker/Churn-ModelQualityMonitor-20201201/datacapture
Ground truth path: s3://sagemaker-us-east-1-440542329720/sagemaker/Churn-ModelQualityMonitor-20201201/ground_truth_data/2026-02-09-12-30-51
Report path: s3://sagemaker-us-east-1-440542329720/sagemaker/Churn-ModelQualityMonitor-20201201/reports


In [15]:
# Upload some test files
S3Uploader.upload("test_data/upload-test-file.txt", f"s3://{bucket}/test_upload")
print("Success! You are all set to proceed.")

Success! You are all set to proceed.


In [16]:
##Upload the pretrained model to S3
s3_key = f"s3://{bucket}/{prefix}"
model_url = S3Uploader.upload("model/xgb-churn-prediction-model.tar.gz", s3_key)
model_url

's3://sagemaker-us-east-1-440542329720/sagemaker/Churn-ModelQualityMonitor-20201201/xgb-churn-prediction-model.tar.gz'

In [17]:
model_name = f"DEMO-xgb-churn-pred-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"

image_uri = image_uris.retrieve(framework="xgboost", version="0.90-1", region=region)

model = Model(image_uri=image_uri, model_data=model_url, role=role, sagemaker_session=session)

  model_name = f"DEMO-xgb-churn-pred-model-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"


In [18]:
endpoint_name = f"DEMO-xgb-churn-model-quality-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"
print("EndpointName =", endpoint_name)

data_capture_config = DataCaptureConfig(
    enable_capture=True, sampling_percentage=100, destination_s3_uri=s3_capture_upload_path
)

model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.large",
    endpoint_name=endpoint_name,
    data_capture_config=data_capture_config,
)


  endpoint_name = f"DEMO-xgb-churn-model-quality-monitor-{datetime.utcnow():%Y-%m-%d-%H%M}"


EndpointName = DEMO-xgb-churn-model-quality-monitor-2026-02-09-1231
-----!

In [21]:
from sagemaker.predictor import Predictor

predictor = Predictor(
    endpoint_name=endpoint_name, sagemaker_session=session, serializer=CSVSerializer()
)

model monitor data

In [19]:
churn_cutoff = 0.8
validate_dataset = "validation_with_predictions.csv"

In [22]:
limit = 200  # Need at least 200 samples to compute standard deviations
i = 0
with open(f"test_data/{validate_dataset}", "w") as baseline_file:
    baseline_file.write("probability,prediction,label\n")  # our header
    with open("test_data/validation.csv", "r") as f:
        for row in f:
            (label, input_cols) = row.split(",", 1)
            probability = float(predictor.predict(input_cols))
            prediction = "1" if probability > churn_cutoff else "0"
            baseline_file.write(f"{probability},{prediction},{label}\n")
            i += 1
            if i > limit:
                break
            print(".", end="", flush=True)
            sleep(0.5)
print()
print("Done!")

........................................................................................................................................................................................................
Done!


In [23]:
!head test_data/validation_with_predictions.csv

probability,prediction,label
0.01516005303710699,0,0
0.1684480607509613,0,0
0.21427156031131744,0,0
0.06330718100070953,0,0
0.02791607193648815,0,0
0.014169521629810333,0,0
0.00571369007229805,0,0
0.10534518957138062,0,0
0.025899196043610573,0,0


In [24]:
baseline_prefix = prefix + "/baselining"
baseline_data_prefix = baseline_prefix + "/data"
baseline_results_prefix = baseline_prefix + "/results"

baseline_data_uri = f"s3://{bucket}/{baseline_data_prefix}"
baseline_results_uri = f"s3://{bucket}/{baseline_results_prefix}"
print(f"Baseline data uri: {baseline_data_uri}")
print(f"Baseline results uri: {baseline_results_uri}")

Baseline data uri: s3://sagemaker-us-east-1-440542329720/sagemaker/Churn-ModelQualityMonitor-20201201/baselining/data
Baseline results uri: s3://sagemaker-us-east-1-440542329720/sagemaker/Churn-ModelQualityMonitor-20201201/baselining/results


In [25]:
baseline_dataset_uri = S3Uploader.upload(f"test_data/{validate_dataset}", baseline_data_uri)
baseline_dataset_uri

's3://sagemaker-us-east-1-440542329720/sagemaker/Churn-ModelQualityMonitor-20201201/baselining/data/validation_with_predictions.csv'

quality

In [26]:
from sagemaker.model_monitor import ModelQualityMonitor
from sagemaker.model_monitor import EndpointInput
from sagemaker.model_monitor.dataset_format import DatasetFormat

In [27]:
# Create the model quality monitoring object
churn_model_quality_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    volume_size_in_gb=20,
    max_runtime_in_seconds=1800,
    sagemaker_session=session,
)

In [28]:
# Name of the model quality baseline job
baseline_job_name = f"DEMO-xgb-churn-model-baseline-job-{datetime.utcnow():%Y-%m-%d-%H%M}"

  baseline_job_name = f"DEMO-xgb-churn-model-baseline-job-{datetime.utcnow():%Y-%m-%d-%H%M}"


In [29]:
# Execute the baseline suggestion job.
# You will specify problem type, in this case Binary Classification, and provide other required attributes.
job = churn_model_quality_monitor.suggest_baseline(
    job_name=baseline_job_name,
    baseline_dataset=baseline_dataset_uri,
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_uri,
    problem_type="BinaryClassification",
    inference_attribute="prediction",
    probability_attribute="probability",
    ground_truth_attribute="label",
)
job.wait(logs=False)

INFO:sagemaker:Creating processing-job with name DEMO-xgb-churn-model-baseline-job-2026-02-09-1238


.........................................................................................!

In [30]:
baseline_job = churn_model_quality_monitor.latest_baselining_job

In [31]:
binary_metrics = baseline_job.baseline_statistics().body_dict["binary_classification_metrics"]
pd.json_normalize(binary_metrics).T

Unnamed: 0,0
confusion_matrix.0.0,173
confusion_matrix.0.1,0
confusion_matrix.1.0,12
confusion_matrix.1.1,16
recall.value,0.571429
recall.standard_deviation,0.027518
precision.value,1.0
precision.standard_deviation,0.0
accuracy.value,0.940299
accuracy.standard_deviation,0.006612


In [32]:
pd.DataFrame(baseline_job.suggested_constraints().body_dict["binary_classification_constraints"]).T

Unnamed: 0,threshold,comparison_operator
recall,0.571429,LessThanThreshold
precision,1.0,LessThanThreshold
accuracy,0.940299,LessThanThreshold
true_positive_rate,0.571429,LessThanThreshold
true_negative_rate,1.0,LessThanThreshold
false_positive_rate,0.0,GreaterThanThreshold
false_negative_rate,0.428571,GreaterThanThreshold
auc,0.939513,LessThanThreshold
f0_5,0.869565,LessThanThreshold
f1,0.727273,LessThanThreshold


## Model Bias Monitor
In this section, we will set up a Model Bias Monitor to check for bias in our model's predictions. We need to prepare a dataset that includes both the model predictions and the features (converted to a format with headers) so that the monitor can identify the sensitive attributes (facets).

In [None]:
from sagemaker.model_monitor import ModelBiasMonitor, BiasAnalysisConfig
from sagemaker.clarify import BiasConfig

# Initialize the Model Bias Monitor
churn_model_bias_monitor = ModelBiasMonitor(
    role=role,
    sagemaker_session=session,
    max_runtime_in_seconds=1800,
    instance_count=1,
    instance_type="ml.m5.large",
    base_job_name="churn-bias-monitor"
)

facet_name = "Feature_1"

bias_config_object = BiasConfig(
    label_values_or_threshold=[1], 
    facet_name=facet_name,
    facet_values_or_threshold=[1], 
    group_name=None
)

# 1. Prepare a CLEAN dataset (Label + Features ONLY)
# We exclude 'probability' and 'prediction' so they aren't sent to the model
val_df = pd.read_csv("test_data/validation.csv", header=None)
feature_names = [f"Feature_{i}" for i in range(1, val_df.shape[1])]
val_df.columns = ["label"] + feature_names

# Save this clean version to CSV
bias_dataset_file = "test_data/validation_bias_clean.csv"
val_df.to_csv(bias_dataset_file, index=False)

print(f"Created clean dataset shape: {val_df.shape} (Should be 70 cols: 1 label + 69 features)")

Reorganized bias dataset shape: (666, 72)
New column order: ['Feature_1', 'Feature_2', 'Feature_3', 'Feature_4', 'Feature_5']... + label, probability, prediction


In [None]:
# 2. Upload the clean dataset
bias_data_prefix = prefix + "/bias-baselining/data-clean"
bias_data_uri = f"s3://{bucket}/{bias_data_prefix}"

bias_dataset_s3_uri = S3Uploader.upload(bias_dataset_file, bias_data_uri)
print(f"Uploaded to: {bias_dataset_s3_uri}")

bias_results_prefix = prefix + "/bias-baselining/results"
bias_results_uri = f"s3://{bucket}/{bias_results_prefix}"

Uploaded bias dataset to: s3://sagemaker-us-east-1-440542329720/sagemaker/Churn-ModelQualityMonitor-20201201/bias-baselining/data/validation_for_bias.csv


In [None]:
from sagemaker.clarify import DataConfig, ModelConfig, ModelPredictedLabelConfig

# 3. Configure Data Input
# Clarify will read this file, strip 'label', and send the remaining 69 cols to the model
data_config = DataConfig(
    s3_data_input_path=bias_dataset_s3_uri,
    s3_output_path=bias_results_uri,
    label='label',
    headers=list(val_df.columns),
    dataset_type='text/csv'
)

# multiple runs generated many error to find thed appropriate endpoint name using the datatime object
endpoint_desc = session.sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
endpoint_config_name = endpoint_desc['EndpointConfigName']
endpoint_config_desc = session.sagemaker_client.describe_endpoint_config(
    EndpointConfigName=endpoint_config_name
)
actual_model_name = endpoint_config_desc['ProductionVariants'][0]['ModelName']

print(f"Using model from endpoint: {actual_model_name}")

# Now use this in your ModelConfig
model_config = ModelConfig(
    model_name=actual_model_name,  # Use the actual model name
    instance_count=1,
    instance_type='ml.m5.large',
    accept_type='text/csv',
    content_type='text/csv'
)

# 4. Configure Model Output
# The model returns a single probability column (index 0). 
# We apply the 0.8 threshold to convert it to a label for bias analysis.
model_predicted_label_config = ModelPredictedLabelConfig(
    probability=0,
    probability_threshold=0.8
)

Using model from endpoint: sagemaker-xgboost-2026-02-09-12-31-08-902


In [None]:
# 5. Run the Job
job_name_bias = f"bias-baseline-{datetime.utcnow():%Y-%m-%d-%H%M}"
print(job_name_bias)

bias_job = churn_model_bias_monitor.suggest_baseline(
    data_config=data_config,
    bias_config=bias_config_object, 
    model_config=model_config,
    model_predicted_label_config=model_predicted_label_config,
    job_name=job_name_bias
)

print("Started Clarify Bias Baseline Job.")
bias_job.wait(logs=False)

  job_name_bias = f"bias-baseline-{datetime.utcnow():%Y-%m-%d-%H%M}"
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.clarify:Analysis Config: {'dataset_type': 'text/csv', 'features': 'Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,Feature_11,Feature_12,Feature_13,Feature_14,Feature_15,Feature_16,Feature_17,Feature_18,Feature_19,Feature_20,Feature_21,Feature_22,Feature_23,Feature_24,Feature_25,Feature_26,Feature_27,Feature_28,Feature_29,Feature_30,Feature_31,Feature_32,Feature_33,Feature_34,Feature_35,Feature_36,Feature_37,Feature_38,Feature_39,Feature_40,Feature_41,Feature_42,Feature_43,Feature_44,Feature_45,Feature_46,Feature_47,Feature_48,Feature_49,Feature_50,Feature_51,Feature_52,Feature_53,Feature_54,Feature_55,Feature_56,Feature_57,Feature_58,Feature_59,Feature_60,Feature_61,Feature_62,Feature_63,Feature_64,Feature_65,Feature_66,Feature_67,Feature_68,Feature_69', 'headers': ['Feature_1', 'Fea

bias-baseline-2026-02-09-1421
Started Clarify Bias Baseline Job.
.......................................................................................................................*

In [None]:
# View the results
latest_bias_job = churn_model_bias_monitor.latest_baselining_job
bias_metrics = latest_bias_job.baseline_statistics().body_dict
print("Bias Metrics:")
pd.json_normalize(bias_metrics)

In [None]:

latest_bias_job = churn_model_bias_monitor.latest_baselining_job
bias_metrics = latest_bias_job.baseline_statistics().body_dict
print("Bias Metrics:")
pd.json_normalize(bias_metrics)

# Cleanup Resources
print("Starting resource cleanup...")

# 1. Delete Endpoint
try:
    session.delete_endpoint(endpoint_name)
    print(f"Deleted endpoint: {endpoint_name}")
except Exception as e:
    print(f"Could not delete endpoint (it may not exist): {e}")

# 2. Delete Endpoint Configuration
try:
    session.delete_endpoint_config(endpoint_name)
    print(f"Deleted endpoint config: {endpoint_name}")
except Exception as e:
    print(f"Could not delete endpoint config: {e}")

# 3. Delete Model
try:
    session.delete_model(model_name)
    print(f"Deleted model: {model_name}")
except Exception as e:
    print(f"Could not delete model: {e}")

# 4. Delete S3 Objects
try:
    s3 = boto3.resource('s3')
    bucket_resource = s3.Bucket(bucket)
    
    # Delete the main project prefix (contains model, data capture, baselines, etc.)
    print(f"Deleting objects with prefix: {prefix}")
    bucket_resource.objects.filter(Prefix=prefix).delete()
    
    # Delete the test upload prefix
    print(f"Deleting objects with prefix: test_upload")
    bucket_resource.objects.filter(Prefix="test_upload").delete()
    
    print("S3 cleanup complete.")
except Exception as e:
    print(f"Error cleaning up S3: {e}")