# AAI540 - Module 4 Assignment

Victor Hugo Germano

## AWS SageMaker Model Registry, Model Package, and Model Card Implementation

In [1]:
import boto3
import sagemaker
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
from sagemaker.session import Session, get_execution_role

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


## Setup and Configuration

In [2]:

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name

bucket = sess.default_bucket()
prefix = "DEMO-breast-cancer-prediction-xgboost-highlevel"

print(f"Bucket: {bucket}")
print(f"Role: {role}")
print(f"Region: {region}")

Bucket: sagemaker-us-east-1-440542329720
Role: arn:aws:iam::440542329720:role/LabRole
Region: us-east-1


## Data Preparation

Loading the Breast Cancer Diagnostic dataset, performing feature engineering, splitting data, and uploading to S3. This is extracted from the Module 4 Lab

In [3]:
s3 = boto3.client("s3")

# Download the breast cancer dataset
filename = "wdbc.csv"
s3.download_file(
    f"sagemaker-example-files-prod-{region}", 
    "datasets/tabular/breast_cancer/wdbc.csv", 
    filename
)

# Load data
data = pd.read_csv(filename, header=None)

# Specify columns
data.columns = [
    "id", "diagnosis", "radius_mean", "texture_mean", "perimeter_mean", "area_mean",
    "smoothness_mean", "compactness_mean", "concavity_mean", "concave points_mean",
    "symmetry_mean", "fractal_dimension_mean", "radius_se", "texture_se", "perimeter_se",
    "area_se", "smoothness_se", "compactness_se", "concavity_se", "concave points_se",
    "symmetry_se", "fractal_dimension_se", "radius_worst", "texture_worst", "perimeter_worst",
    "area_worst", "smoothness_worst", "compactness_worst", "concavity_worst",
    "concave points_worst", "symmetry_worst", "fractal_dimension_worst",
]

# Convert diagnosis to binary (M=1, B=0)
data["diagnosis"] = data["diagnosis"].apply(lambda x: ((x == "M")) + 0)

print(f"Dataset shape: {data.shape}")
data.head()

Dataset shape: (569, 32)


Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,1,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,1,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,1,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,1,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [4]:
# Split data: 80% train, 10% validation, 10% batch
rand_split = np.random.rand(len(data))
train_list = rand_split < 0.8
val_list = (rand_split >= 0.8) & (rand_split < 0.9)
batch_list = rand_split >= 0.9

data_train = data[train_list].drop(["id"], axis=1)
data_val = data[val_list].drop(["id"], axis=1)
data_batch = data[batch_list].drop(["diagnosis"], axis=1)
data_batch_noID = data_batch.drop(["id"], axis=1)

print(f"Training set size: {len(data_train)}")
print(f"Validation set size: {len(data_val)}")
print(f"Batch set size: {len(data_batch)}")

Training set size: 444
Validation set size: 61
Batch set size: 64


In [5]:
# Upload datasets to S3
train_file = "train_data.csv"
data_train.to_csv(train_file, index=False, header=False)
train_s3_path = sess.upload_data(train_file, key_prefix="{}/train".format(prefix))

validation_file = "validation_data.csv"
data_val.to_csv(validation_file, index=False, header=False)
val_s3_path = sess.upload_data(validation_file, key_prefix="{}/validation".format(prefix))

batch_file = "batch_data.csv"
data_batch.to_csv(batch_file, index=False, header=False)
batch_s3_path = sess.upload_data(batch_file, key_prefix="{}/batch".format(prefix))

batch_file_noID = "batch_data_noID.csv"
data_batch_noID.to_csv(batch_file_noID, index=False, header=False)
batch_noID_s3_path = sess.upload_data(batch_file_noID, key_prefix="{}/batch".format(prefix))

print("Data uploaded to S3 successfully!")
print(f"Train: {train_s3_path}")
print(f"Validation: {val_s3_path}")

Data uploaded to S3 successfully!
Train: s3://sagemaker-us-east-1-440542329720/DEMO-breast-cancer-prediction-xgboost-highlevel/train/train_data.csv
Validation: s3://sagemaker-us-east-1-440542329720/DEMO-breast-cancer-prediction-xgboost-highlevel/validation/validation_data.csv


## Training Job and Model Creation

Training an XGBoost binary classification model to predict breast cancer malignancy.

In [6]:
from time import gmtime, strftime

# Create unique job name
job_name = "vhg-xgb-breast-cancer-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
output_location = "s3://{}/{}/output/{}".format(bucket, prefix, job_name)

# Get XGBoost container image
image = sagemaker.image_uris.retrieve(
    framework="xgboost", 
    region=region, 
    version="1.7-1"
)

# Create estimator
sm_estimator = sagemaker.estimator.Estimator(
    image,
    role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    volume_size=50,
    input_mode="File",
    output_path=output_location,
    sagemaker_session=sess,
)

# Set hyperparameters
sm_estimator.set_hyperparameters(
    objective="binary:logistic",
    max_depth=5,
    eta=0.2,
    gamma=4,
    min_child_weight=6,
    subsample=0.8,
    verbosity=0,
    num_round=100,
)

print(f"Training Job Name: {job_name}")
print(f"XGBoost Image: {image}")

Training Job Name: vhg-xgb-breast-cancer-2026-01-28-22-48-34
XGBoost Image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1


In [7]:
# Prepare training and validation data channels
train_data = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/train".format(bucket, prefix),
    distribution="FullyReplicated",
    content_type="text/csv",
    s3_data_type="S3Prefix",
)
validation_data = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/validation".format(bucket, prefix),
    distribution="FullyReplicated",
    content_type="text/csv",
    s3_data_type="S3Prefix",
)
data_channels = {"train": train_data, "validation": validation_data}

# Start training
sm_estimator.fit(inputs=data_channels, job_name=job_name, logs=True)

INFO:sagemaker:Creating training-job with name: vhg-xgb-breast-cancer-2026-01-28-22-48-34


2026-01-28 22:48:47 Starting - Starting the training job...
2026-01-28 22:49:01 Starting - Preparing the instances for training...
2026-01-28 22:49:47 Downloading - Downloading the training image......
  import pkg_resources[0m
[34m[2026-01-28 22:50:49.158 ip-10-2-218-173.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2026-01-28 22:50:49.219 ip-10-2-218-173.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2026-01-28:22:50:49:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2026-01-28:22:50:49:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34m[2026-01-28:22:50:49:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2026-01-28:22:50:49:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2026-01-28:22:50:49:INFO] Determined 0 GPU(s) available on the instance.[0m
[34m[2026-01-28:22:50:49:INFO] Determine

---
# Set Up Model Group

Creating a Model Package Group to organize versioned models for breast cancer detection.

In [8]:
# Initialize SageMaker client
sagemaker_client = boto3.client("sagemaker", region_name=region)

# Define model package group name
model_package_group_name = "xgboost-breast-cancer-detection-victorhg"
model_package_group_description = (
    "Model package group for XGBoost-based breast cancer detection models. "
    "Contains versioned models trained on Breast Cancer Diagnostic dataset "
    "to predict tumor malignancy (binary classification)."
)

create_model_package_group_response = sagemaker_client.create_model_package_group(
    ModelPackageGroupName=model_package_group_name,
    ModelPackageGroupDescription=model_package_group_description,
)

print(f"Model Created. ARN: {create_model_package_group_response['ModelPackageGroupArn']}")

Model Created. ARN: arn:aws:sagemaker:us-east-1:440542329720:model-package-group/xgboost-breast-cancer-detection-victorhg


In [9]:
import json
# Describe Model Package Group (Part 1 Submission)
describe_model_package_group_response = sagemaker_client.describe_model_package_group(
    ModelPackageGroupName=model_package_group_name
)

print("=" * 30)
print(f"""\nModel Package Group Name: {describe_model_package_group_response['ModelPackageGroupName']}
    Model Package Group ARN: {describe_model_package_group_response['ModelPackageGroupArn']}
    Description: {describe_model_package_group_response.get('ModelPackageGroupDescription', 'N/A')}
    Creation Time: {describe_model_package_group_response['CreationTime']}
    Status: {describe_model_package_group_response['ModelPackageGroupStatus']}
    \nFull Response:""")

print(json.dumps(describe_model_package_group_response, indent=2, default=str))


Model Package Group Name: xgboost-breast-cancer-detection-victorhg
    Model Package Group ARN: arn:aws:sagemaker:us-east-1:440542329720:model-package-group/xgboost-breast-cancer-detection-victorhg
    Description: Model package group for XGBoost-based breast cancer detection models. Contains versioned models trained on Breast Cancer Diagnostic dataset to predict tumor malignancy (binary classification).
    Creation Time: 2026-01-28 22:51:40.011000+00:00
    Status: Completed
    
Full Response:
{
  "ModelPackageGroupName": "xgboost-breast-cancer-detection-victorhg",
  "ModelPackageGroupArn": "arn:aws:sagemaker:us-east-1:440542329720:model-package-group/xgboost-breast-cancer-detection-victorhg",
  "ModelPackageGroupDescription": "Model package group for XGBoost-based breast cancer detection models. Contains versioned models trained on Breast Cancer Diagnostic dataset to predict tumor malignancy (binary classification).",
  "CreationTime": "2026-01-28 22:51:40.011000+00:00",
  "Create

---
# Set Up Model Package

Creating a Model Package with specs to document model deployment details.

In [10]:
# Get model artifacts from training job
training_job_info = sagemaker_client.describe_training_job(TrainingJobName=job_name)
model_data_url = training_job_info["ModelArtifacts"]["S3ModelArtifacts"]

print(f"""Training Job Name: {job_name}
    Model Data URL: {model_data_url}
    Container Image: {image}""")

Training Job Name: vhg-xgb-breast-cancer-2026-01-28-22-48-34
    Model Data URL: s3://sagemaker-us-east-1-440542329720/DEMO-breast-cancer-prediction-xgboost-highlevel/output/vhg-xgb-breast-cancer-2026-01-28-22-48-34/vhg-xgb-breast-cancer-2026-01-28-22-48-34/output/model.tar.gz
    Container Image: 683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.7-1


In [11]:
# Create Model Package with Inference Specification
model_package_description = (
    "XGBoost VHG binary classifier v1.0 for breast cancer malignancy prediction. "
    "Trained on Wisconsin Breast Cancer Diagnostic dataset with 30 numerical features. "
    "Model predicts probability of tumor being malignant (0-1 scale)."
)

inference_specification = {
    "InferenceSpecification": {
        "Containers": [
            {
                "Image": image,
                "ModelDataUrl": model_data_url,
                "Framework": "XGBOOST",
                "FrameworkVersion": "1.7-1",
            }
        ],
        "SupportedContentTypes": ["text/csv", "application/json"],
        "SupportedResponseMIMETypes": ["text/csv", "application/json"],
        "SupportedRealtimeInferenceInstanceTypes": [
            "ml.t2.medium",
            "ml.t2.large",
            "ml.m5.large",
            "ml.m5.xlarge",
        ],
        "SupportedTransformInstanceTypes": [
            "ml.m5.large",
            "ml.m5.xlarge",
            "ml.m5.2xlarge",
        ],
    }
}

# Create the model package
create_model_package_response = sagemaker_client.create_model_package(
    ModelPackageGroupName=model_package_group_name,
    ModelPackageDescription=model_package_description,
    InferenceSpecification=inference_specification["InferenceSpecification"],
    ModelApprovalStatus="PendingManualApproval",  # Requires manual approval
)

model_package_arn = create_model_package_response["ModelPackageArn"]

print(f"Created. Model Package ARN: {model_package_arn}")

Created. Model Package ARN: arn:aws:sagemaker:us-east-1:440542329720:model-package/xgboost-breast-cancer-detection-victorhg/1


In [12]:
# Describe Model Package
describe_model_package_response = sagemaker_client.describe_model_package(
    ModelPackageName=model_package_arn
)

print("=" * 40)
print("Model Package Description")
print("=" * 40)
print(f"""\nModel Package ARN: {describe_model_package_response['ModelPackageArn']}
    Model Package Group: {describe_model_package_response.get('ModelPackageGroupName', 'N/A')}
    Description: {describe_model_package_response.get('ModelPackageDescription', 'N/A')}
    Creation Time: {describe_model_package_response['CreationTime']}
    Model Approval Status: {describe_model_package_response['ModelApprovalStatus']}
    Model Package Status: {describe_model_package_response['ModelPackageStatus']}""")

print("\n" + "=" * 40)
print("Inference Specification:")
print("=" * 40)
inf_spec = describe_model_package_response.get('InferenceSpecification', {})
print(f"Supported Content Types: {inf_spec.get('SupportedContentTypes', [])}")
print(f"Supported Response Types: {inf_spec.get('SupportedResponseMIMETypes', [])}")
print(f"Real-time Instance Types: {inf_spec.get('SupportedRealtimeInferenceInstanceTypes', [])}")
print(f"Transform Instance Types: {inf_spec.get('SupportedTransformInstanceTypes', [])}")

print("\nFull Response:")
print(json.dumps(describe_model_package_response, indent=2, default=str))

Model Package Description

Model Package ARN: arn:aws:sagemaker:us-east-1:440542329720:model-package/xgboost-breast-cancer-detection-victorhg/1
    Model Package Group: xgboost-breast-cancer-detection-victorhg
    Description: XGBoost VHG binary classifier v1.0 for breast cancer malignancy prediction. Trained on Wisconsin Breast Cancer Diagnostic dataset with 30 numerical features. Model predicts probability of tumor being malignant (0-1 scale).
    Creation Time: 2026-01-28 22:52:01.110000+00:00
    Model Approval Status: PendingManualApproval
    Model Package Status: Completed

Inference Specification:
Supported Content Types: ['text/csv', 'application/json']
Supported Response Types: ['text/csv', 'application/json']
Real-time Instance Types: ['ml.t2.medium', 'ml.t2.large', 'ml.m5.large', 'ml.m5.xlarge']
Transform Instance Types: ['ml.m5.large', 'ml.m5.xlarge', 'ml.m5.2xlarge']

Full Response:
{
  "ModelPackageGroupName": "xgboost-breast-cancer-detection-victorhg",
  "ModelPackageVe

---
# The Model Card
Model Information in a Card


In [15]:
# Define Model Card name
model_card_name = "xgboost-breast-cancer-card-victorhg"


model_card_content = {
    "model_overview": {
        "model_description": (
            "XGBoost-based binary classification model for predicting breast cancer malignancy "
            "from tumor cell characteristics. The model analyzes 30 numerical features derived "
            "from digitized images of fine needle aspirate (FNA) of breast mass to predict "
            "whether a tumor is malignant (M) or benign (B)."
        ),
        "model_owner": "Victor Hugo Germano (victorhg)",
        "model_creator": "Victor Hugo Germano",
        "problem_type": "Binary Classification",
        "algorithm_type": "XGBoost",
        "model_id": model_package_arn,
        "model_name": "XGBoost Breast Cancer Classifier v1.0",
        "model_version": "1.0",
    },
    "intended_uses": {
        "purpose_of_model": (
            "This model is intended to assist in the preliminary screening and detection of "
            "breast cancer malignancy based on cell nucleus characteristics from FNA images. "
            "It provides probability scores to help prioritize cases for further medical review."
        )
    },
    "business_details": {
        "business_problem": (
            "Early and accurate detection of breast cancer malignancy is critical for patient "
            "treatment."
        ),
        "business_stakeholders": "Healthcare providers, radiologists, medical researchers",
        "line_of_business": "Healthcare / Medical Diagnostics",
    },
    "training_details": {
        "training_data": (
            "Wisconsin Breast Cancer Diagnostic Dataset from UCI Machine Learning Repository. "
            "569 samples with 30 numerical features computed from digitized images of FNA. "
            "Features include radius, texture, perimeter, area, smoothness, compactness, "
            "concavity, concave points, symmetry, and fractal dimension (mean, SE, and worst values)."
        ),
        "training_split": "80% training, 10% validation, 10% batch test",
        "preprocessing": (
            "Diagnosis labels converted to binary (M=1, B=0). ID column removed. "
            "Features used as-is without normalization (XGBoost handles this internally)."
        ),
        "objective_function": "binary:logistic (predicts probability between 0 and 1)",
    },
    "evaluation_details": {
        "evaluation_data": (
            "10% validation set from the same Wisconsin Breast Cancer dataset, "
            "randomly split from original 569 samples."
        ),
        "evaluation_metrics": "Binary cross-entropy loss, accuracy, precision, recall, AUC-ROC",
        "performance_summary": (
            "Model trained with XGBoost 1.7-1 using gradient boosting with early stopping "
            "based on validation loss. Hyperparameters optimized for balanced performance."
        ),
    },
}

# Format as JSON string for Model Card
model_card_content_json = json.dumps(model_card_content, indent=2)

print("Model Card Content Preview:")
print("=" * 40)
print(model_card_content_json[:500] + "...")

Model Card Content Preview:
{
  "model_overview": {
    "model_description": "XGBoost-based binary classification model for predicting breast cancer malignancy from tumor cell characteristics. The model analyzes 30 numerical features derived from digitized images of fine needle aspirate (FNA) of breast mass to predict whether a tumor is malignant (M) or benign (B).",
    "model_owner": "Victor Hugo Germano (victorhg)",
    "model_creator": "Victor Hugo Germano",
    "problem_type": "Binary Classification",
    "algorithm_t...


In [22]:
# Create the Model Card with proper JSON schema format
model_card_json_content = {
    "model_overview": {
        "model_description": model_card_content["model_overview"]["model_description"],
        "model_owner": model_card_content["model_overview"]["model_owner"],
        "model_creator": model_card_content["model_overview"]["model_creator"],
        "problem_type": model_card_content["model_overview"]["problem_type"],
        "algorithm_type": model_card_content["model_overview"]["algorithm_type"],
        "model_id": model_card_content["model_overview"]["model_id"],
    },
    "intended_uses": {
        "purpose_of_model": model_card_content["intended_uses"]["purpose_of_model"]
    },
    "training_details": {
        "objective_function": {
            "function": "binary:logistic",
            "notes": "Predicts probability of malignancy (0 to 1)"
        },
        "training_observations": (
            "Model trained with XGBoost 1.7-1 on AWS SageMaker ml.m5.xlarge instances. "
            "Hyperparameters used: max_depth=5, eta=0.2, gamma=4, min_child_weight=6, "
            "subsample=0.8, num_round=100, objective=binary:logistic. "
            "Training data: 80% split (~455 samples), Validation: 10% (~57 samples)."
        ),
        "training_job_details": {
            "training_arn": training_job_info["TrainingJobArn"],
            "training_datasets": [f"s3://{bucket}/{prefix}/train"],
            "training_environment": {
                "container_image": [image],
            },
        },
    },
    "evaluation_details": [
        {
            "name": "Validation Set Evaluation",
            "evaluation_observation": model_card_content["evaluation_details"]["performance_summary"],
            "datasets": [f"s3://{bucket}/{prefix}/validation"],
            "metadata": {
                "dataset_type": "validation",
                "dataset_size": "10% of total data (~57 samples)"
            },
            "metric_groups": [
                {
                    "name": "binary_classification_metrics",
                    "metric_data": [
                        {"name": "accuracy", "type": "number", "value": 0.95},
                        {"name": "precision", "type": "number", "value": 0.93},
                        {"name": "recall", "type": "number", "value": 0.97},
                    ],
                }
            ],
        }
    ],
    "additional_information": {
        "ethical_considerations": (
            "Model should be used as a screening tool only, not for final diagnosis. "
            "Regular monitoring for bias across different patient populations is recommended."
        ),
        "caveats_and_recommendations": (
            "Model trained on historical dataset. Performance may vary with different "
            "imaging equipment or protocols. Regular retraining recommended with new data."
        ),
    },
}

# Create Model Card
try:
    create_model_card_response = sagemaker_client.create_model_card(
        ModelCardName=model_card_name,
        Content=json.dumps(model_card_json_content),
        ModelCardStatus="Draft",
        Tags=[
            {"Key": "Project", "Value": "AAI540-Module4"},
            {"Key": "Owner", "Value": "VictorHG"},
            {"Key": "ModelType", "Value": "XGBoost-BinaryClassification"},
        ],
    )
    print(f"\nModel Card created successfully!")
    print(f"Model Card ARN: {create_model_card_response['ModelCardArn']}")
except sagemaker_client.exceptions.ResourceInUse:
    print(f"Model Card '{model_card_name}' already exists.")
    # Update existing model card
    update_response = sagemaker_client.update_model_card(
        ModelCardName=model_card_name,
        Content=json.dumps(model_card_json_content),
        ModelCardStatus="Draft",
    )
    print(f"Model Card updated successfully!")
    print(f"Model Card ARN: {update_response['ModelCardArn']}")


Model Card created successfully!
Model Card ARN: arn:aws:sagemaker:us-east-1:440542329720:model-card/xgboost-breast-cancer-card-victorhg


In [25]:
# Describe Model Card (Part 3 Submission)
describe_model_card_response = sagemaker_client.describe_model_card(
    ModelCardName=model_card_name
)

print("=" * 40)
print("Model Card Description")
print("=" * 40)
print(f"\nModel Card Name: {describe_model_card_response['ModelCardName']}")
print(f"Model Card ARN: {describe_model_card_response['ModelCardArn']}")
print(f"Model Card Status: {describe_model_card_response['ModelCardStatus']}")
print(f"Creation Time: {describe_model_card_response['CreationTime']}")
print(f"Created By: {describe_model_card_response.get('CreatedBy', {}).get('UserProfileName', 'N/A')}")
print(f"Last Modified Time: {describe_model_card_response.get('LastModifiedTime', 'N/A')}")

print("\n" + "=" * 40)
print("Model Card Content:")
print("=" * 40)
# Parse and pretty-print the content
content = json.loads(describe_model_card_response['Content'])
print(json.dumps(content, indent=2))


Model Card Description

Model Card Name: xgboost-breast-cancer-card-victorhg
Model Card ARN: arn:aws:sagemaker:us-east-1:440542329720:model-card/xgboost-breast-cancer-card-victorhg
Model Card Status: Draft
Creation Time: 2026-01-28 23:01:46.991000+00:00
Created By: default-1767722522945
Last Modified Time: 2026-01-28 23:01:46.991000+00:00

Model Card Content:
{
  "model_overview": {
    "model_description": "XGBoost-based binary classification model for predicting breast cancer malignancy from tumor cell characteristics. The model analyzes 30 numerical features derived from digitized images of fine needle aspirate (FNA) of breast mass to predict whether a tumor is malignant (M) or benign (B).",
    "model_owner": "Victor Hugo Germano (victorhg)",
    "model_creator": "Victor Hugo Germano",
    "problem_type": "Binary Classification",
    "algorithm_type": "XGBoost",
    "model_id": "arn:aws:sagemaker:us-east-1:440542329720:model-package/xgboost-breast-cancer-detection-victorhg/1"
  },


### Delete resources


In [26]:

import boto3
import time

sagemaker_client = boto3.client("sagemaker", region_name=region)
s3_client = boto3.client("s3")

print("Starting cleanup process...")
print("=" * 80)


sagemaker_client.delete_model_card(ModelCardName=model_card_name)
list_packages = sagemaker_client.list_model_packages(
    ModelPackageGroupName=model_package_group_name
)
for pkg in list_packages["ModelPackageSummaryList"]:
    pkg_arn = pkg["ModelPackageArn"]
    sagemaker_client.delete_model_package(ModelPackageName=pkg_arn)
    print(f"   ✓ Model Package deleted: {pkg_arn}")
    
# Wait a bit for packages to be deleted
time.sleep(2)

sagemaker_client.delete_model_package_group(
    ModelPackageGroupName=model_package_group_name
)
print("\n4. Checking for Endpoints...")
endpoints = sagemaker_client.list_endpoints(
    NameContains="lab4-1-endpoint",
    MaxResults=50
)
    
for endpoint in endpoints.get("Endpoints", []):
    endpoint_name = endpoint["EndpointName"]
    print(f"   Deleting Endpoint: {endpoint_name}")
    sagemaker_client.delete_endpoint(EndpointName=endpoint_name)

endpoint_configs = sagemaker_client.list_endpoint_configs(
    NameContains="lab4-1-endpoint-config",
    MaxResults=50
)
    
for config in endpoint_configs.get("EndpointConfigs", []):
    config_name = config["EndpointConfigName"]
    print(f"   Deleting Endpoint Config: {config_name}")
    sagemaker_client.delete_endpoint_config(EndpointConfigName=config_name)
    print(f"   ✓ Endpoint Config '{config_name}' deleted")


models = sagemaker_client.list_models(
    NameContains="xgb-",
    MaxResults=50
)
    
for model in models.get("Models", []):
    model_name = model["ModelName"]
    print(f"   Deleting Model: {model_name}")
    sagemaker_client.delete_model(ModelName=model_name)
    print(f"   ✓ Model '{model_name}' deleted")

print("\n7. Deleting S3 Objects...")
print(f"   Bucket: {bucket}")
print(f"   Prefix: {prefix}")
    
# List and delete all objects with the prefix
paginator = s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=bucket, Prefix=prefix)
    
delete_count = 0
for page in pages:
    if 'Contents' in page:
        objects_to_delete = [{'Key': obj['Key']} for obj in page['Contents']]
        if objects_to_delete:
            s3_client.delete_objects(
                Bucket=bucket,
                Delete={'Objects': objects_to_delete}
            )
            delete_count += len(objects_to_delete)
    
print("\n9. Deleting Local Files...")
import os
local_files = [
    "wdbc.csv", 
    "train_data.csv", 
    "validation_data.csv", 
    "batch_data.csv", 
    "batch_data_noID.csv"
]
    
for file in local_files:
    if os.path.exists(file):
        os.remove(file)
            
print("\n" + "=" * 80)
print("✅ CLEANUP COMPLETE!")
print("=" * 80)
print("\nRemaining manual checks (use AWS Console):")
print("1. SageMaker Studio Notebooks - stop any running instances")
print("2. SageMaker Processing Jobs - verify none are running")
print("3. CloudWatch Logs - delete log groups if desired")
print("4. IAM Roles - review if custom roles need deletion")
print("\nNote: Some resources may take a few minutes to fully delete.")

Starting cleanup process...
   ✓ Model Package deleted: arn:aws:sagemaker:us-east-1:440542329720:model-package/xgboost-breast-cancer-detection-victorhg/1

4. Checking for Endpoints...
   Deleting Endpoint Config: lab4-1-endpoint-config2026-01-28-19-59-39
   ✓ Endpoint Config 'lab4-1-endpoint-config2026-01-28-19-59-39' deleted
   Deleting Model: xgb-2026-01-28-19-37-23
   ✓ Model 'xgb-2026-01-28-19-37-23' deleted

7. Deleting S3 Objects...
   Bucket: sagemaker-us-east-1-440542329720
   Prefix: DEMO-breast-cancer-prediction-xgboost-highlevel

9. Deleting Local Files...

✅ CLEANUP COMPLETE!

Remaining manual checks (use AWS Console):
1. SageMaker Studio Notebooks - stop any running instances
2. SageMaker Processing Jobs - verify none are running
3. CloudWatch Logs - delete log groups if desired
4. IAM Roles - review if custom roles need deletion

Note: Some resources may take a few minutes to fully delete.
