In [0]:
# Config with iterative process for each csv file in the list

# === CONFIG UMUM ===
from pyspark.sql import functions as F

CATALOG        = "lapse_scoring_dev"
SCHEMA_LANDING = "00_landing"
SCHEMA_BRONZE  = "01_bronze"
VOLUME_NAME    = "pak_tani"
WRITE_MODE     = "append"   # atau "overwrite" untuk refresh awal

BASE_DIR = f"dbfs:/Volumes/{CATALOG}/{SCHEMA_LANDING}/{VOLUME_NAME}"

# Daftar pekerjaan: (SRC_PATTERN relatif terhadap BASE_DIR) -> TABLE_NAME di Bronze
JOBS = [
    #{"SRC_PATTERN": "agent_data.csv",          "TABLE_NAME": "pak_tani_agent_data"},
    #{"SRC_PATTERN": "claim_data.csv",          "TABLE_NAME": "pak_tani_claim_data"},
    #{"SRC_PATTERN": "client_data_true.csv",          "TABLE_NAME": "pak_tani_client_data_true"},
    {"SRC_PATTERN": "golden_customers_360.csv",          "TABLE_NAME": "pak_tani_golden_customers_360"},
    #{"SRC_PATTERN": "golden_leads_360.csv",          "TABLE_NAME": "pak_tani_golden_leads_360"},
    #{"SRC_PATTERN": "payment_data.csv",          "TABLE_NAME": "pak_tani_payment_data"},
    #{"SRC_PATTERN": "policy_data.csv",          "TABLE_NAME": "pak_tani_policy_data"},
    #{"SRC_PATTERN": "rider_data.csv",          "TABLE_NAME": "pak_tani_rider_data"},
    # Tambah baris lain di sini...
]

# === PERSIAPAN CATALOG/SCHEMA BRONZE ===
spark.sql(f"CREATE CATALOG IF NOT EXISTS {CATALOG}")
spark.sql(f"USE CATALOG {CATALOG}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS `{SCHEMA_BRONZE}`")
spark.sql(f"USE SCHEMA `{SCHEMA_BRONZE}`")

def detect_sep(sample_path: str) -> str:
    """Deteksi delimiter dari header file (`,` vs `;`)."""
    head = dbutils.fs.head(sample_path, 4096)
    return "," if head.count(",") >= head.count(";") else ";"

def ingest_one(src_pattern: str, table_name: str):
    src_path = f"{BASE_DIR}/{src_pattern}"
    target_table = f'{CATALOG}.`{SCHEMA_BRONZE}`.{table_name}'
    print(f"\n==> Ingest: {src_path} -> {target_table}")

    # Deteksi delimiter
    sep = detect_sep(src_path)
    print(f"Detected delimiter: '{sep}'")

    # Baca CSV
    df = (spark.read
          .option("header", True)
          .option("sep", sep)
          .option("inferSchema", True)
          .option("quote", '"')
          .option("escape", '"')
          .option("multiLine", True)
          .option("mode", "PERMISSIVE")
          .csv(src_path))

    # Normalisasi ringan nama kolom
    df = df.toDF(*[c.strip().lower().replace(" ", "_") for c in df.columns])
    df = df.toDF(*[c.strip().lower().replace("<=", "le") for c in df.columns])
    df = df.toDF(*[c.strip().lower().replace(">=", "ge") for c in df.columns])
    df = df.toDF(*[c.strip().lower().replace("<", "l") for c in df.columns])
    df = df.toDF(*[c.strip().lower().replace(">", "g") for c in df.columns])
    df = df.toDF(*[c.strip().lower().replace("=", "e") for c in df.columns])


    # Kolom ID -> string (kalau ada)
    for c in ["chdrnum", "clntnum", "lifenum"]:
        if c in df.columns:
            df = df.withColumn(c, F.col(c).cast("string"))

    # Metadata UC-friendly
    df = (df
          .withColumn("_ingest_ts", F.current_timestamp())
          .withColumn("_source_path", F.col("_metadata.file_path")))

    # Tulis ke Bronze
    (df.write
        .mode(WRITE_MODE)
        .option("mergeSchema", "true")
        .format("delta")
        .saveAsTable(target_table))

    # Verifikasi ringan
    cnt = spark.table(target_table).count()
    print(f"Write OK â†’ {target_table} | rows={cnt}")

# === JALANKAN SEMUA JOB ===
errors = []
for job in JOBS:
    try:
        ingest_one(job["SRC_PATTERN"], job["TABLE_NAME"])
    except Exception as e:
        print(f"[ERROR] {job['SRC_PATTERN']} -> {job['TABLE_NAME']} :: {e}")
        errors.append((job, str(e)))

if errors:
    print("\n=== SUMMARY: SOME JOBS FAILED ===")
    for (job, msg) in errors:
        print(f"- {job['SRC_PATTERN']} -> {job['TABLE_NAME']} :: {msg}")
else:
    print("\n=== ALL JOBS COMPLETED SUCCESSFULLY ===")
