In [0]:
from pyspark.sql.functions import *

In [0]:
sales_silver = (
    spark.table("retail_analytics.pizza.bronze_sales")
    .withColumn(
        "order_date_dt",
        coalesce(
            expr("try_to_date(order_date, 'dd/MM/yyyy')"),
            expr("try_to_date(order_date, 'd/M/yyyy')"),
            expr("try_to_date(order_date, 'dd-MM-yyyy')"),
            expr("try_to_date(order_date, 'd-M-yyyy')")
        )
    )
    .withColumn("revenue", col("quantity") * col("unit_price"))
    .withColumn("day", dayofmonth(col("order_date_dt")))
    .withColumn("month", month(col("order_date_dt")))
    .withColumn("year", year(col("order_date_dt")))
    .filter(
        col("pizza_name_id").isNotNull() &
        col("revenue").isNotNull() &
        col("pizza_category").isNotNull()
    )
)
(sales_silver.write 
    .mode("overwrite") 
    .option("overwriteSchema", "true")
    .format("delta") 
    .saveAsTable("retail_analytics.pizza.silver_sales"))


In [0]:
# %sql
# with cte as (
# select pizza_name_id,total_pizza_day,day,month,year,revenue_perday from (
# SELECT pizza_name_id,day,count(day)as total_pizza_day,month,year,revenue,revenue*count(day) as revenue_perday
 
# FROM silver_sales
# where 1=1
# --month=1
# group by pizza_name_id,day,month,year,revenue))
# select sum(total_pizza_day) as total_pizza_month,month,year from cte where 1=1 group by month,year

In [0]:
ingredients_silver = (
    spark.table("retail_analytics.pizza.bronze_ingredients")
    .withColumn("ingredient", explode(split(col("pizza_ingredients"), ", ")))
    .withColumn(
        "Items_Qty_In_Grams",
        when(col("Items_Qty_In_Grams").isNull(), 60).otherwise(col("Items_Qty_In_Grams"))
    )
)
ingredients_silver.write \
    .mode("overwrite") \
    .format("delta") \
    .saveAsTable("retail_analytics.pizza.silver_ingredients")
