In [0]:
from pyspark.sql.functions import col

dbutils.widgets.text("category_filter", "", "Category to filter and process (populated by {{item}})")
dbutils.widgets.text("enriched_sales_table", "silver_sales_enriched", "Enriched Sales Table")
dbutils.widgets.text("output_path_base", "dbfs:/FileStore/lab_data/category_sales/", "Base output path for category sales")
dbutils.widgets.text("database_name", "module2_db", "Database Name")

current_category_widget = dbutils.widgets.get("category_filter")
current_category = current_category_widget 

enriched_sales_table_name = dbutils.widgets.get("enriched_sales_table")
output_path_base = dbutils.widgets.get("output_path_base")
db_name = dbutils.widgets.get("database_name")

full_enriched_sales_table_path = f"{db_name}.{enriched_sales_table_name}"
safe_category_name = "".join(c if c.isalnum() else "_" for c in current_category)
category_output_path = f"{output_path_base.rstrip('/')}/{safe_category_name}/"

print(f"Processing sales for category: {current_category} (Safe name: {safe_category_name})")
print(f"Input table: {full_enriched_sales_table_path}")
print(f"Output path: {category_output_path}")

if not current_category:
    dbutils.notebook.exit("Category not provided. Exiting.")

try:
    df_enriched_sales = spark.table(full_enriched_sales_table_path)
except Exception as e:
    print(f"Error reading table {full_enriched_sales_table_path}: {e}")
    dbutils.notebook.exit(f"Failed to read table: {full_enriched_sales_table_path}")

if "Category" not in df_enriched_sales.columns:
    dbutils.notebook.exit(f"Enriched sales table missing 'Category' column for filtering.")

df_category_sales = df_enriched_sales.filter(col("Category") == current_category)

if df_category_sales.isEmpty():
    print(f"No sales found for category: {current_category}")
    dbutils.notebook.exit(f"No sales for category {current_category}")
else:
    df_category_sales.write.format("delta").mode("overwrite").save(category_output_path)
    count = df_category_sales.count()
    print(f"Successfully processed and saved {count} sales records for category '{current_category}' to {category_output_path}")
    dbutils.jobs.taskValues.set(key=f"processed_count_{safe_category_name}", value=count)
    dbutils.notebook.exit(f"Processed category {current_category}")