In [None]:
import os, json, boto3, botocore
from datetime import datetime

# --- Env (from mlops-env.sh) ---
REGION   = os.environ.get("AWS_REGION", "ap-northeast-2")
BUCKET   = os.environ["BUCKET"]
LABP     = os.environ.get("LAB_PREFIX", "student")
SM_ROLE  = os.environ["SM_ROLE_ARN"]
S3_ARTIFACTS  = os.environ["S3_ARTIFACTS"]

boto_sess = boto3.Session(region_name=REGION)
sm = boto_sess.client("sagemaker")
s3 = boto_sess.client("s3")

print("Region:", REGION)
print("Role:", SM_ROLE)

In [None]:
# Find the newest artifacts/evaluation/<job_name>/evaluation.json
candidate_prefixes = [
    "artifacts/evaluation/",           # canonical layout from Lab 6
    f"{LABP}/artifacts/evaluation/",   # tolerate older/alternate layout
]

candidates = []
paginator = s3.get_paginator("list_objects_v2")
for pref in candidate_prefixes:
    for page in paginator.paginate(Bucket=BUCKET, Prefix=pref):
        for obj in page.get("Contents", []):
            k = obj["Key"]
            if k.endswith("/evaluation.json"):
                candidates.append((obj["LastModified"], k))

if not candidates:
    raise SystemExit("No evaluation.json found under artifacts/evaluation/. Run Lab 6 first.")

# Pick the most recent evaluation.json by LastModified
last_modified, latest_key = max(candidates, key=lambda x: x[0])
print("Latest evaluation.json:", latest_key, "| LastModified:", last_modified)

# Optional: derive job_name (artifacts/evaluation/<job_name>/evaluation.json)
job_name = latest_key.rstrip("/").split("/")[-2]
print("Detected job_name:", job_name)

obj = s3.get_object(Bucket=BUCKET, Key=latest_key)
evaluation = json.loads(obj["Body"].read())
print(json.dumps(evaluation, indent=2)[:500], "...")


In [None]:
# --- Step 4.a — Pick the serving image (framework container) for inference ---
from sagemaker import image_uris

sklearn_image = image_uris.retrieve(
    framework="sklearn",
    region=REGION,
    version="1.2-1",
    image_scope="inference",
    py_version="py3",
)
print("Serving image:", sklearn_image)

# --- Step 4.b — Gather model + metrics ---
# Derive training job name from evaluation.json produced in Lab 6
job_name = evaluation["job_name"]

# Find model.tar.gz S3 location from the training job
desc = sm.describe_training_job(TrainingJobName=job_name)
model_data_url = desc["ModelArtifacts"]["S3ModelArtifacts"]
print("Model artifact S3:", model_data_url)

# Pull test metrics from evaluation.json
test_metrics = evaluation["test"]
pr_auc  = float(test_metrics["pr_auc"])
roc_auc = float(test_metrics["roc_auc"])
threshold_star = float(test_metrics["threshold_star"])

# --- Step 4.c — Create a new Model Package version in the Group (no explicit name) ---
# Required inputs:
#   MPG_NAME        -> your model package group name (string)
#   sklearn_image   -> ECR image URI for sklearn serving
#   model_data_url  -> S3 URI to model.tar.gz (from describe_training_job)
#   S3_ARTIFACTS    -> e.g., s3://.../artifacts  (ensure this is set in env)
#   job_name        -> evaluation["job_name"]
#   pr_auc, roc_auc -> floats parsed from evaluation.json

# Point the metrics at the same evaluation.json you wrote in Lab 6
eval_json_s3 = f"{S3_ARTIFACTS.rstrip('/')}/evaluation/{job_name}/evaluation.json"

model_description = (
    f"Telco churn (LogReg). "
    f"Test ROC AUC={roc_auc:.3f}, PR AUC={pr_auc:.3f}. "
    f"Artifacts from job {job_name}."
)

create_resp = sm.create_model_package(
    ModelPackageGroupName=MPG_NAME,            # only the group (let SM version)
    ModelPackageDescription=model_description,
    InferenceSpecification={
        "Containers": [
            {
                "Image": sklearn_image,
                "ModelDataUrl": model_data_url,
            }
        ],
        "SupportedContentTypes": ["text/csv", "application/json"],
        "SupportedResponseMIMETypes": ["application/json"],
    },
    ModelApprovalStatus="PendingManualApproval",
    ModelMetrics={
        "ModelQuality": {
            "Statistics": {
                "ContentType": "application/json",
                "S3Uri": eval_json_s3,
            }
        }
    },
    # IMPORTANT: Do NOT put Tags here — tags are not supported on versions.
)

model_package_arn = create_resp["ModelPackageArn"]
print("Created Model Package version:", model_package_arn)

# --- Step 4.d — (Optional) Approve immediately (or leave pending for manual gate) ---
# If you want to auto-approve now, do it as a separate call:
# sm.update_model_package(
#     ModelPackageArn=model_package_arn,
#     ModelApprovalStatus="Approved",
#     ApprovalDescription=f"Auto-approved from Lab 7. pr_auc={pr_auc:.3f}, roc_auc={roc_auc:.3f}, t*={threshold_star:.2f}",
# )


In [None]:
resp = sm.describe_model_package(ModelPackageName=model_package_arn)
print("Approval:", resp["ModelApprovalStatus"])
print("Eval metrics S3:", resp["ModelMetrics"]["ModelQuality"]["Statistics"]["S3Uri"])



In [None]:
review_comment = (
    f"Approved based on Lab 6 metrics: ROC AUC={roc_auc:.3f}, "
    f"PR AUC={pr_auc:.3f}, threshold*={threshold_star:.2f}. "
    "Meets classroom acceptance criteria."
)

_ = sm.update_model_package(
    ModelPackageArn=model_package_arn,
    ModelApprovalStatus="Approved",  # or "Rejected"
    ApprovalDescription=review_comment,
)

print("✓ Set approval to Approved.")

In [None]:
resp = sm.describe_model_package(ModelPackageName=model_package_arn)
print("Approval:", resp["ModelApprovalStatus"])
print("Eval metrics S3:", resp["ModelMetrics"]["ModelQuality"]["Statistics"]["S3Uri"])

In [None]:
## Step 6 — Verify & fetch the latest Approved package (fixed)

def latest_model_package_arn(group_name: str, status: str = "Approved"):
    """Return (arn, created_time) of the newest model package with given status."""
    resp = sm.list_model_packages(
        ModelPackageGroupName=group_name,
        ModelApprovalStatus=status,     # "Approved" | "Rejected" | "PendingManualApproval"
        SortBy="CreationTime",
        SortOrder="Descending",
        MaxResults=20,
    )
    pkgs = resp.get("ModelPackageSummaryList", [])
    if not pkgs:
        return None, None
    return pkgs[0]["ModelPackageArn"], pkgs[0]["CreationTime"]

latest_approved_arn, created = latest_model_package_arn(MPG_NAME, "Approved")
print("Latest Approved ARN:", latest_approved_arn, "| Created:", created)

if latest_approved_arn:
    # NOTE: Describe uses ModelPackageName, even when you pass an ARN.
    info = sm.describe_model_package(ModelPackageName=latest_approved_arn)
    print("Approval:", info["ModelApprovalStatus"])
    print("Container Image:", info["InferenceSpecification"]["Containers"][0]["Image"])
    print("ModelDataUrl:", info["InferenceSpecification"]["Containers"][0]["ModelDataUrl"])
    print("Eval JSON:", info["ModelMetrics"]["ModelQuality"]["Statistics"]["S3Uri"])
else:
    print("No Approved packages yet. Approve one in Step 5 and re-run this cell.")
