In [0]:
# AZURE DATABRICKS NOTEBOOK - CELL 1 (PYTHON)
# Completes: Upload, Transform, Save Final Metrics (Delta Table)

from pyspark.sql.functions import sum, avg

# 1. Load Data
df = spark.read.csv(
    "/FileStore/tables/cleaned_sales_with_metrics.csv", 
    header=True, 
    inferSchema=True
)

# 2. Transform: Calculate Final Metric (Profit Margin by Category) 
category_metrics = df.groupBy("category").agg(
    sum("revenue").alias("Total_Category_Revenue"),
    avg("profit_margin").alias("Average_Profit_Margin")
)

# 3. Register Temporary View (Crucial FIX for SQL Query in Cell 2)
# This makes the full transaction data accessible to the SQL cell.
df.createOrReplaceTempView("v_sales_data") 


# 4. Save Final Metrics (Week 4 Deliverable) 
# Saving to Delta Lake table (category_performance_metrics) is best practice.
category_metrics.write.format("delta").mode("overwrite").saveAsTable("category_performance_metrics")

print("Saved final metrics to Delta table: category_performance_metrics")
print("Registered temporary view for SQL query: v_sales_data")

Saved final metrics to Delta table: category_performance_metrics
Registered temporary view for SQL query: v_sales_data


In [0]:
%sql
-- AZURE DATABRICKS NOTEBOOK - CELL 2 (SQL)
-- Completes: Find top 3 best-selling products 
SELECT 
  product_name,
  SUM(quantity) AS Total_Quantity_Sold
FROM v_sales_data -- Queries the temporary view created in the Python cell
GROUP BY product_name
ORDER BY Total_Quantity_Sold DESC
LIMIT 3;

product_name,Total_Quantity_Sold
Eco Mug,15
Wireless Speaker,10
Laptop Pro,9


In [0]:
%sql
SELECT * FROM category_performance_metrics;

category,Total_Category_Revenue,Total_Category_Profit,Average_Profit_Margin
Home Goods,221.25,,0.3157894736842105
Electronics,3120.0,,0.3151851851851851


In [0]:
# Load the saved Delta Table
final_metrics_df = spark.read.table("category_performance_metrics")

# Display the data to activate the download option
final_metrics_df.display()

category,Total_Category_Revenue,Total_Category_Profit,Average_Profit_Margin
Home Goods,221.25,,0.3157894736842105
Electronics,3120.0,,0.3151851851851851
