In [0]:
# 03_Gold.py
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, countDistinct

spark = SparkSession.builder.getOrCreate()

# ─── 0) Point at your Hive Metastore ─────────────────────────────────────────
spark.sql("USE CATALOG spark_catalog")
spark.sql("USE default")

# ─── 1) Paths ─────────────────────────────────────────────────────────────────
silver_path         = "/tmp/silver/cc_events_enterprise"
feature_usage_path  = "/tmp/gold/feature_usage_fact"
user_activity_path  = "/tmp/gold/user_activity_fact"

# ─── 2) Read the Silver “enterprise view” ────────────────────────────────────
silver_df = spark.read.format("delta").load(silver_path)
display(silver_df.limit(5))

# ─── 3) Feature Usage Fact ───────────────────────────────────────────────────
feature_usage = (
    silver_df
      .filter(col("feature_category").isNotNull())
      .groupBy("event_date","app_name","feature_category")
      .agg(count("*").alias("usage_count"))
)

feature_usage.write.format("delta") \
    .mode("overwrite") \
    .partitionBy("event_date") \
    .save(feature_usage_path)

spark.sql(f"""
  CREATE TABLE IF NOT EXISTS feature_usage_fact
  USING DELTA
  LOCATION '{feature_usage_path}'
""")
display(spark.read.format("delta").load(feature_usage_path))

# ─── 4) User Activity Fact ──────────────────────────────────────────────────
user_activity = (
    silver_df
      .groupBy("event_date","app_name","region")
      .agg(countDistinct("user_id").alias("active_users"))
)

user_activity.write.format("delta") \
    .mode("overwrite") \
    .partitionBy("event_date") \
    .save(user_activity_path)

spark.sql(f"""
  CREATE TABLE IF NOT EXISTS user_activity_fact
  USING DELTA
  LOCATION '{user_activity_path}'
""")
display(spark.read.format("delta").load(user_activity_path))

# ─── 5) Optimize Gold tables for speed ───────────────────────────────────────
spark.sql(f"OPTIMIZE delta.`{feature_usage_path}` ZORDER BY (feature_category)")
spark.sql(f"OPTIMIZE delta.`{user_activity_path}`   ZORDER BY (region)")