In [0]:
# COMMAND ----------
import pandas as pd
import numpy as np
import requests
from databricks.sdk import WorkspaceClient

# Inisialisasi WorkspaceClient untuk ambil host dan token otomatis
w = WorkspaceClient()
host = w.config.host
token = w.config.token

# Nama endpoint kamu (GANTI ini!)
ENDPOINT_NAME = "Lapse_Score_Insurance1"  # <-- ganti sesuai UI Serving


In [0]:
# COMMAND ----------
spark_df = spark.table("cross_sell_insurance.01_feature_staging.stage2_clean_feature_table")

# Cek kolom ID yang tersedia
print(spark_df.columns)

# Tentukan kolom ID (ganti kalau ID-mu beda)
id_col = "client_id"  # misal pakai client_id

# Convert ke pandas (kalau data sangat besar, nanti bisa dioptimasi)
df = spark_df.toPandas()
print("Total rows:", len(df))

# Simpan ID & fitur
ids = df[id_col].copy()
X_all = df.drop(columns=["is_target_customer"], errors="ignore")

# Bersihkan NaN & inf -> JSON compliant
X_all = (
    X_all
    .replace([np.inf, -np.inf], np.nan)
    .fillna(0)
)

X_all.head()


In [0]:
# COMMAND ----------
def score_batch_via_endpoint(df_batch, endpoint_name=ENDPOINT_NAME):
    """
    Mengirim batch DataFrame (pandas) ke Databricks Serving Endpoint
    dan mengembalikan list skor (probabilitas).
    """
    # Ambil host & token dari WorkspaceClient
    w = WorkspaceClient()
    host = w.config.host
    token = w.config.token

    url = f"{host}/serving-endpoints/{endpoint_name}/invocations"

    # Pastikan tidak ada NaN/inf di batch
    clean_batch = (
        df_batch
        .replace([np.inf, -np.inf], np.nan)
        .fillna(0)
    )

    payload = {
        "dataframe_records": clean_batch.to_dict(orient="records")
    }

    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json"
    }

    resp = requests.post(url, headers=headers, json=payload)
    resp.raise_for_status()

    out = resp.json()

    # Biasanya key-nya "predictions"
    preds = out.get("predictions", out)
    # Kalau formatnya list of dict, ambil value pertama
    if len(preds) > 0 and isinstance(preds[0], dict):
        # Coba ambil key bernama 'prediction', kalau nggak ada ambil value pertama
        scores = []
        for p in preds:
            if "prediction" in p:
                scores.append(p["prediction"])
            else:
                # ambil value pertama
                scores.append(list(p.values())[0])
    else:
        scores = preds

    return np.array(scores, dtype=float)


In [0]:
# COMMAND ----------
batch_size = 500   # bisa kamu naik-turunkan
n = len(X_all)

all_scores = []

for start in range(0, n, batch_size):
    end = min(start + batch_size, n)
    batch = X_all.iloc[start:end]
    print(f"Scoring rows {start}â€“{end-1} ...")

    scores = score_batch_via_endpoint(batch, endpoint_name=ENDPOINT_NAME)
    all_scores.append(scores)

all_scores = np.concatenate(all_scores)
print("Total scores:", len(all_scores))
