In [0]:
from pyspark.sql.functions import *

In [0]:
pizza_metrics=spark.sql("""with cte as (
select pizza_name_id,total_pizza_day,day,month,year,revenue_perday from (
SELECT pizza_name_id,day,count(day)as total_pizza_day,month,year,revenue,revenue*count(day) as revenue_perday
FROM retail_analytics.pizza.silver_sales
where 1=1
group by pizza_name_id,day,month,year,revenue))

SELECT pizza_name_id,month,year,SUM(total_pizza_day) AS total_pizzas_sold,SUM(revenue_perday) AS total_revenue
FROM cte
GROUP BY pizza_name_id, month, year
order by month,year""")

(pizza_metrics.write 
    .mode("overwrite") 
    .option("overwriteSchema", "true")   
    .format("delta")   
    .saveAsTable("retail_analytics.pizza.gold_pizza_metrics_monthly"))

In [0]:
pizza_daily_metric=spark.sql("""with cte as (
select pizza_name_id,total_pizzas_sold,day,month,year,revenue_perday from (
SELECT pizza_name_id,day,count(day)as total_pizzas_sold,month,year,revenue,revenue*count(day) as revenue_perday
 
FROM retail_analytics.pizza.silver_sales
where 1=1
--month=1
group by pizza_name_id,day,month,year,revenue))
select * from cte
                             """)
pizza_daily_metric.write \
    .mode("overwrite") \
    .option("overwriteSchema", "true")\
    .format("delta") \
    .saveAsTable("retail_analytics.pizza.gold_pizza_metrics_daily")

In [0]:
ingredient_usage = spark.sql("""with cte as (
select pizza_name_id,total_pizza_day,day,month,year,revenue_perday from (
SELECT pizza_name_id,day,count(day)as total_pizza_day,month,year,revenue,revenue*count(day) as revenue_perday
 
FROM retail_analytics.pizza.silver_sales
where 1=1
--month=1
group by pizza_name_id,day,month,year,revenue)),
pizza_sales(
select pizza_name_id,sum(total_pizza_day) as total_pizza_month,month,year from cte where 1=1 group by pizza_name_id,month,year)

SELECT sl.pizza_name_id,ing.pizza_ingredients as ingredient,sl.month,sl.year,sl.total_pizza_month,ing.items_qty_in_grams,(ing.items_qty_in_grams * sl.total_pizza_month) AS total_ingredient_grams
FROM pizza_sales sl
JOIN retail_analytics.pizza.silver_ingredients ing
  ON ing.pizza_name_id = sl.pizza_name_id""")

ingredient_usage.write \
    .mode("overwrite") \
    .option("overwriteSchema", "true")\
    .format("delta") \
    .saveAsTable("retail_analytics.pizza.gold_ingredient_usage")


In [0]:
ing_summary=spark.sql("""SELECT pizza_name_id,month,year,concat_ws(', ',collect_list(concat(pizza_ingredients, ' (', total_ingredient_grams, 'g)'))) AS ingredient_summary
FROM retail_analytics.pizza.gold_ingredient_usage
GROUP BY pizza_name_id, month, year
          """)
ing_summary.write.mode("overwrite").saveAsTable("retail_analytics.pizza.gold_ingredient_summary")


In [0]:
%sql
with cte as (
select pizza_name_id,total_pizza_day,day,month,year,revenue_perday from (
SELECT pizza_name_id,day,count(day)as total_pizza_day,month,year,revenue,revenue*count(day) as revenue_perday
 
FROM retail_analytics.pizza.silver_sales
where 1=1
--month=1
group by pizza_name_id,day,month,year,revenue)),
pizza_sales(
select pizza_name_id,sum(total_pizza_day) as total_pizza_month,month,year from cte where 1=1 group by pizza_name_id,month,year),


final_cte as (
select sl.pizza_name_id,sl.total_pizza_month,sl.month,sl.year,ing.pizza_ingredients,(items_qty_in_grams*total_pizza_month) as total_ingredient from pizza_sales sl join retail_analytics.pizza.silver_ingredients ing
on ing.pizza_name_id=sl.pizza_name_id where 1=1
order by month,sl.pizza_name_id)

-- SELECT pizza_name_id,month,year,concat_ws(', ',collect_list(concat(pizza_ingredients,' (',total_ingredient,'g)'))
--  ) AS ingredient_summary
-- FROM final_cte
-- GROUP BY pizza_name_id, month, year;

SELECT
    pizza_name_id,
    month,
    year,
    SUM(total_pizza_day) AS total_pizzas_sold,
    SUM(revenue_perday) AS total_revenue
FROM cte
GROUP BY pizza_name_id, month, year;


-- SELECT
--     sl.pizza_name_id,
--     ing.pizza_ingredients,
--     sl.month,
--     sl.year,
--     sl.total_pizza_month,
--     ing.items_qty_in_grams,
--     (ing.items_qty_in_grams * sl.total_pizza_month) AS total_ingredient_grams
-- FROM pizza_sales sl
-- JOIN retail_analytics.pizza.silver_ingredients ing
--   ON ing.pizza_name_id = sl.pizza_name_id;

-- SELECT
--     pizza_name_id,
--     month,
--     year,
--     concat_ws(', ',
--         collect_list(
--             concat(pizza_ingredients, ' (', total_ingredient_grams, 'g)')
--         )
--     ) AS ingredient_summary
-- FROM gold.fact_pizza_ingredient_monthly_usage
-- GROUP BY pizza_name_id, month, year;



In [0]:
%sql
with cte as (
select pizza_name_id,total_pizza_day,day,month,year,revenue_perday from (
SELECT pizza_name_id,day,count(day)as total_pizza_day,month,year,revenue,revenue*count(day) as revenue_perday
 
FROM retail_analytics.pizza.silver_sales
where 1=1
--month=1
group by pizza_name_id,day,month,year,revenue))

select pizza_name_id,sum(total_pizza_day) as total_pizza_month,month,year from cte where 1=1 
group by pizza_name_id,month,year

