In [None]:
# Self-healing env loader for notebooks
import os, shlex, subprocess, json
from pathlib import Path

def load_env_from(path="~/mlops-env.sh", required=("AWS_REGION","BUCKET","S3_DATA","S3_ARTIFACTS")):
    p = Path(path).expanduser()
    if not p.exists():
        raise FileNotFoundError(f"{p} not found")
    cmd = f"bash -lc 'set -a; source {shlex.quote(str(p))} >/dev/null 2>&1; env'"
    out = subprocess.check_output(cmd, shell=True, text=True)
    got = {}
    for line in out.splitlines():
        if "=" in line:
            k,v = line.split("=",1)
            if k: got[k]=v
    os.environ.update(got)
    missing = [k for k in required if not os.environ.get(k)]
    if missing:
        raise RuntimeError("Missing after load_env: " + ", ".join(missing))
    print(json.dumps({k: os.environ[k] for k in required}, indent=2))

load_env_from()


In [None]:
# --- imports
import os, io, json, tarfile, tempfile
from pathlib import Path
from datetime import datetime

import boto3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import (
    roc_curve, auc, precision_recall_curve,
    classification_report, confusion_matrix,
    roc_auc_score, average_precision_score
)
import joblib

pd.set_option("display.max_columns", 120)

# --- environment (loaded from ~/mlops-env.sh)
REGION  = os.environ.get("AWS_REGION", "ap-northeast-2")
BUCKET  = os.environ["BUCKET"]
S3_DATA_PROCESSED = os.environ["S3_DATA_PROCESSED"]   # e.g., s3://.../data/processed
S3_ARTIFACTS      = os.environ["S3_ARTIFACTS"]        # e.g., s3://.../artifacts
LABP   = os.environ.get("LAB_PREFIX", "student")

# --- AWS clients
boto_sess = boto3.Session(region_name=REGION)
s3 = boto_sess.client("s3")
sm = boto_sess.client("sagemaker")

def parse_s3(uri: str):
    """Split s3://bucket/key into (bucket, key)."""
    assert uri.startswith("s3://"), f"Not an s3 uri: {uri}"
    b, k = uri[5:].split("/", 1)
    return b, k

In [None]:
# --- list & pick the latest training job with our prefix
resp = sm.list_training_jobs(SortBy="CreationTime", SortOrder="Descending", MaxResults=20)
jobs = [j for j in resp["TrainingJobSummaries"] if j["TrainingJobName"].startswith(f"{LABP}-train-")]
if not jobs:
    raise SystemExit("No training jobs found for this LAB_PREFIX. Run Lab 5 first.")
job_name = jobs[0]["TrainingJobName"]
print("Latest training job:", job_name)

# --- describe to get S3 locations
desc = sm.describe_training_job(TrainingJobName=job_name)
model_art  = desc["ModelArtifacts"]["S3ModelArtifacts"]      # s3://.../model.tar.gz
output_pre = desc["OutputDataConfig"]["S3OutputPath"]        # s3://.../artifacts/training/
output_tar = f"{output_pre.rstrip('/')}/{job_name}/output/output.tar.gz"

# --- try to fetch metrics.json from output.tar.gz (nice to have; we’ll still recompute)
metrics = None
try:
    b_out, k_out = parse_s3(output_tar)
    with tempfile.TemporaryDirectory() as td:
        local_tar = Path(td) / "output.tar.gz"
        s3.download_file(b_out, k_out, str(local_tar))
        with tarfile.open(local_tar) as t:
            names = [m.name for m in t.getmembers()]
            if "metrics.json" in names:
                metrics = json.load(t.extractfile("metrics.json"))
except Exception as e:
    print("[WARN] Could not read metrics.json from output tar:", e)

print("metrics.json (if any):", json.dumps(metrics, indent=2)[:400], "..." if metrics else "None")


In [None]:
# --- load model.joblib from model.tar.gz
b_mod, k_mod = parse_s3(model_art)
with tempfile.TemporaryDirectory() as td:
    local_tar = Path(td) / "model.tar.gz"
    s3.download_file(b_mod, k_mod, str(local_tar))
    with tarfile.open(local_tar) as t:
        member = next((m for m in t.getmembers() if m.name.endswith("model.joblib")), None)
        if not member:
            raise FileNotFoundError("model.joblib not found in model artifact")
        model_bundle = joblib.load(t.extractfile(member))

model = model_bundle["model"]              # scikit-learn LogisticRegression
preprocess = model_bundle.get("preprocess")  # may be None; splits are already numeric

# --- read processed test split
def s3_read_csv(uri: str) -> pd.DataFrame:
    b, k = parse_s3(uri)
    obj = s3.get_object(Bucket=b, Key=k)
    return pd.read_csv(io.BytesIO(obj["Body"].read()))

test_uri = f"{S3_DATA_PROCESSED.rstrip('/')}/test/test.csv"
df_test = s3_read_csv(test_uri)

feature_cols = [c for c in df_test.columns if c != "label"]
X_test = df_test[feature_cols].astype("float64").to_numpy()
y_test = df_test["label"].astype("int64").to_numpy()

X_test.shape, y_test.shape



In [None]:
# --- probabilities & default 0.5 decision
proba = model.predict_proba(X_test)[:, 1]
pred  = (proba >= 0.5).astype(int)

roc = roc_auc_score(y_test, proba)
pr  = average_precision_score(y_test, proba)
cm  = confusion_matrix(y_test, pred)
rep = classification_report(y_test, pred, output_dict=True)

print(f"Test ROC AUC: {roc:.3f} | Test PR AUC: {pr:.3f}")
print("Confusion matrix @0.5:\n", cm)

# --- ROC curve
fpr, tpr, _ = roc_curve(y_test, proba)
plt.figure(figsize=(5,4))
plt.plot(fpr, tpr, label=f"AUC={auc(fpr,tpr):.3f}")
plt.plot([0,1],[0,1],'--')
plt.title("ROC Curve (Test)"); plt.xlabel("FPR"); plt.ylabel("TPR"); plt.legend(); plt.show()

# --- PR curve
prec, rec, thr = precision_recall_curve(y_test, proba)
plt.figure(figsize=(5,4))
plt.plot(rec, prec)
plt.title("Precision–Recall (Test)"); plt.xlabel("Recall"); plt.ylabel("Precision"); plt.show()



In [None]:
# --- Evaluate at the default threshold (0.5) and plot side-by-side
from sklearn.metrics import (
    roc_auc_score, average_precision_score, confusion_matrix, classification_report,
    roc_curve, precision_recall_curve, auc
)
import matplotlib.pyplot as plt
import numpy as np

# 1) Probabilities and default 0.5 decision
proba = model.predict_proba(X_test)[:, 1]
pred  = (proba >= 0.5).astype(int)

# 2) Scalar scores + confusion matrix
roc = roc_auc_score(y_test, proba)                 # ranking quality (threshold-free)
pr  = average_precision_score(y_test, proba)       # area under PR curve (a.k.a. AP)
cm  = confusion_matrix(y_test, pred)
rep = classification_report(y_test, pred, output_dict=True)

print(f"Test ROC AUC: {roc:.3f} | Test PR AUC: {pr:.3f}")
print("Confusion matrix @0.5 [TN FP; FN TP]:\n", cm)

# 3) Curves (side-by-side)
fpr, tpr, _ = roc_curve(y_test, proba)
prec, rec, _ = precision_recall_curve(y_test, proba)
roc_auc = auc(fpr, tpr)                            # equals roc above
ap = pr                                            # alias for clarity
base_prec = y_test.mean()                          # prevalence baseline for PR

fig, axes = plt.subplots(1, 2, figsize=(11, 4))

# Left: ROC
axes[0].plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
axes[0].plot([0, 1], [0, 1], "--", linewidth=1)
axes[0].set_title("ROC Curve (Test)")
axes[0].set_xlabel("False Positive Rate (1 - Specificity)")
axes[0].set_ylabel("True Positive Rate (Recall)")
axes[0].legend(loc="lower right")

# Right: Precision–Recall
axes[1].plot(rec, prec, label=f"AP = {ap:.3f}")
axes[1].axhline(base_prec, linestyle="--", linewidth=1, label=f"Baseline = {base_prec:.3f}")
axes[1].set_title("Precision–Recall Curve (Test)")
axes[1].set_xlabel("Recall")
axes[1].set_ylabel("Precision")
axes[1].legend(loc="lower left")

fig.tight_layout()
plt.show()


In [None]:
# --- compute P/R/F1 across thresholds
def sweep_thresholds(y_true, p):
    prec, rec, thr = precision_recall_curve(y_true, p)
    thr = np.r_[0.0, thr]  # align lengths
    f1  = 2 * (prec * rec) / (prec + rec + 1e-12)
    return pd.DataFrame({"threshold": thr, "precision": prec, "recall": rec, "f1": f1})

sweep = sweep_thresholds(y_test, proba)

# --- pick strategy: best F1 (simple balanced choice)
best_idx = int(np.nanargmax(sweep["f1"].values))
t_star   = float(sweep.iloc[best_idx]["threshold"])

# --- visualize the trade-off
plt.figure(figsize=(6,4))
plt.plot(sweep["threshold"], sweep["precision"], label="Precision")
plt.plot(sweep["threshold"], sweep["recall"],    label="Recall")
plt.plot(sweep["threshold"], sweep["f1"],        label="F1")
plt.axvline(t_star, linestyle="--", label=f"Best F1 @ {t_star:.2f}")
plt.xlabel("Threshold"); plt.ylabel("Score"); plt.title("Threshold Sweep (Test)"); plt.legend(); plt.show()

# --- confusion & report at chosen threshold
pred_star = (proba >= t_star).astype(int)
cm_star   = confusion_matrix(y_test, pred_star)
rep_star  = classification_report(y_test, pred_star, output_dict=True)

print("Chosen threshold:", round(t_star, 3))
print("Confusion matrix @t*:\n", cm_star)
pd.DataFrame(rep_star).T.head()



In [None]:
evaluation = {
    "job_name": job_name,
    "generated_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
    "test": {
        "roc_auc": float(roc),
        "pr_auc": float(pr),
        "threshold_default": 0.5,
        "threshold_star": t_star,
        "confusion_matrix@0.5": cm.tolist(),
        "confusion_matrix@t*": cm_star.tolist(),
        "report@0.5": rep,          # per-class precision/recall/f1/support
        "report@t*":  rep_star,
    },
}

print(json.dumps(evaluation, indent=2)[:800], "...\n")

eval_prefix = f"{S3_ARTIFACTS.rstrip('/')}/evaluation/{job_name}/"
b_eval, k_eval = parse_s3(eval_prefix + "evaluation.json")
s3.put_object(Bucket=b_eval, Key=k_eval, Body=json.dumps(evaluation, indent=2).encode("utf-8"))
print("Wrote:", f"{eval_prefix}evaluation.json")



In [None]:
# Try the SDK Model Card first; if your region lacks support, use the Markdown fallback below.
try:
    from sagemaker.model_card import ModelCard, ModelCardStatus
    from sagemaker.model_card import (
        ModelOverview, BusinessDetails, IntendedUses, TrainingDetails,
        EvaluationResult, Metric, ObjectiveFunction, Function
    )

    model_card_name = f"{LABP}-telco-churn-{job_name}"

    overview = ModelOverview(
        model_id=model_card_name,
        model_name="Telco Churn - Logistic Regression",
        problem_type="BinaryClassification",
        algorithm_type="LogisticRegression",
    )
    business = BusinessDetails(
        business_problem="Predict churn risk to prioritize retention outreach.",
        business_goals="Improve retention by acting on high-risk customers.",
        stakeholders=["Data Science", "Marketing", "Care Ops"],
    )
    uses = IntendedUses(
        intended_uses=["Batch and real-time scoring of churn probability"],
        factors_affecting_model_efficiency=["Pricing changes", "Plan changes"],
        risk_rating="Low",
    )
    train = TrainingDetails(
        objective_function=ObjectiveFunction(function=Function("binary_classification"), notes="ROC AUC / PR AUC"),
        training_job_details={"training_job_arn": desc["TrainingJobArn"]},
    )
    eval_results = [
        EvaluationResult(name="Test ROC AUC", metric=Metric(name="roc_auc", type="number", value=float(roc))),
        EvaluationResult(name="Test PR AUC",  metric=Metric(name="pr_auc",  type="number", value=float(pr))),
    ]

    mc = ModelCard(
        name=model_card_name,
        status=ModelCardStatus.DRAFT,
        model_overview=overview,
        business_details=business,
        intended_uses=uses,
        training_details=train,
        evaluation_results=eval_results,
    )
    mc.create()
    print("Created/updated Model Card:", model_card_name)

except Exception as e:
    print("[INFO] Falling back to Markdown model card:", e)
    md = f"""# Model Card — Telco Churn (LogReg)

**Training job:** {job_name}
**Generated:** {datetime.utcnow().isoformat(timespec="seconds")}Z

## Overview
Binary classification to predict churn. Algorithm: Logistic Regression.

## Data & Processing
- Inputs: {S3_DATA_PROCESSED} (train/val/test)
- Preprocess: median impute numerics, one-hot encode categoricals, standard-scale numerics

## Key Metrics (Test)
- ROC AUC: **{roc:.3f}**
- PR AUC: **{pr:.3f}**
- Threshold (F1*): **{t_star:.2f}**

## Confusion Matrix @ t*
{cm_star}

## Intended Use & Risks
Retention targeting; monitor drift after pricing/plan changes; avoid using sensitive attributes.
"""
    key = f"{LABP}/artifacts/model-cards/{job_name}/model_card.md"
    s3.put_object(Bucket=BUCKET, Key=key, Body=md.encode("utf-8"))
    print("Wrote s3://{}/{}".format(BUCKET, key))



In [None]:
print(json.dumps(
    {
        "job": job_name,
        "test": {
            "roc_auc": round(roc, 3),
            "pr_auc": round(pr, 3),
            "best_threshold_f1": round(t_star, 3),
            "cm@t*": cm_star.tolist(),
        },
    },
    indent=2,
))

