In [0]:
from pyspark.sql import functions as F

print("=" * 55)
print("PIPELINE VALIDATION")
print("=" * 55)

# ── Check all layers have data ───────────────────────────────────
checks = {
    "Bronze Orders":      "ecommerce.bronze.raw_orders",
    "Bronze Customers":   "ecommerce.bronze.raw_customers",
    "Silver Orders":      "ecommerce.silver.orders",
    "Silver Customers":   "ecommerce.silver.customers",
    "Gold Daily Revenue": "ecommerce.gold.daily_revenue",
    "Gold Customer LTV":  "ecommerce.gold.customer_ltv",
}

all_passed = True

for name, table in checks.items():
    count = spark.table(table).count()
    if count > 0:
        print(f"{name:<25} {count:>10,} records")
    else:
        print(f"{name:<25} EMPTY — PIPELINE FAILED!")
        all_passed = False

# ── Check Silver orders have no bad status ───────────────────────
bad_status = spark.sql("""
    SELECT COUNT(*) as count
    FROM ecommerce.silver.orders
    WHERE order_status IS NULL
""").collect()[0]["count"]

if bad_status == 0:
    print("No NULL order statuses in Silver")
else:
    print(f"{bad_status} NULL order statuses found")
    all_passed = False

# ── Check Gold revenue is positive ──────────────────────────────
negative_revenue = spark.sql("""
    SELECT COUNT(*) as count
    FROM ecommerce.gold.daily_revenue
    WHERE total_revenue <= 0
""").collect()[0]["count"]

if negative_revenue == 0:
    print("All revenue values are positive")
else:
    print(f"{negative_revenue} negative revenue records found")
    all_passed = False

# ── Final status ─────────────────────────────────────────────────
print("\n" + "=" * 55)
if all_passed:
    print("ALL CHECKS PASSED — Pipeline completed successfully!")
else:
    raise Exception("PIPELINE VALIDATION FAILED — Check logs!")