In [0]:
from pyspark.sql.functions import col, sum, avg, count, round, date_format

# Caminhos
silver_base = "/Volumes/workspace/default/ifood-files/silver/delivery_center_enriched"
gold_base = "/Volumes/workspace/default/ifood-files/gold/kpis"

# Leitura Silver
df_silver = spark.read.format("delta").load(silver_base)
df_silver = df_silver.withColumn("order_month", date_format("order_purchase_timestamp", "yyyy-MM"))

# 1. Receita total por mês
revenue_by_month = df_silver.groupBy("order_month") \
    .agg(round(sum("payment_amount"), 2).alias("total_revenue")) \
    .orderBy("order_month")

display(revenue_by_month)  # Visualização
revenue_by_month.write.format("delta").option("mergeSchema", "true").mode("overwrite").save(f"{gold_base}/revenue_by_month")

# 2. Número de pedidos por canal
orders_by_channel = df_silver.groupBy("channel_type") \
    .agg(count("order_id").alias("total_orders")) \
    .orderBy("total_orders", ascending=False)

display(orders_by_channel)  # Visualização
orders_by_channel.write.format("delta").option("mergeSchema", "true").mode("overwrite").save(f"{gold_base}/orders_by_channel")

# 3. Ticket médio por loja
avg_ticket_per_store = df_silver.groupBy("store_id", "store_name") \
    .agg(round(avg("payment_amount"), 2).alias("avg_ticket")) \
    .orderBy("avg_ticket", ascending=False)

display(avg_ticket_per_store)  # Visualização
avg_ticket_per_store.write.format("delta").option("mergeSchema", "true").mode("overwrite").save(f"{gold_base}/avg_ticket_per_store")

# 4. Distância média de entrega
avg_delivery_distance = df_silver.select("delivery_distance_meters") \
    .agg(round(avg("delivery_distance_meters"), 2).alias("avg_delivery_distance"))

display(avg_delivery_distance)  # Visualização
avg_delivery_distance.write.format("delta").option("mergeSchema", "true").mode("overwrite").save(f"{gold_base}/avg_delivery_distance")

# 5. Total de pedidos por cidade (hub_city)
orders_by_city = df_silver.groupBy("hub_city") \
    .agg(count("order_id").alias("total_orders")) \
    .orderBy("total_orders", ascending=False)

display(orders_by_city)  # Visualização
orders_by_city.write.format("delta").option("mergeSchema", "true").mode("overwrite").save(f"{gold_base}/orders_by_city")

# 6. Entregas por motorista (driver_id e driver_type)
deliveries_by_driver = df_silver.groupBy("driver_id", "driver_type") \
    .agg(count("delivery_id").alias("total_deliveries")) \
    .orderBy("total_deliveries", ascending=False)

display(deliveries_by_driver)  # Visualização
deliveries_by_driver.write.format("delta").option("mergeSchema", "true").mode("overwrite").save(f"{gold_base}/deliveries_by_driver")
