In [0]:
from pyspark.sql.functions import col, sum, round

dbutils.widgets.text("sales_table_name", "bronze_sales", "Source Sales Table")
dbutils.widgets.text("customers_table_name", "bronze_customers", "Source Customers Table")
dbutils.widgets.text("products_table_name", "bronze_products", "Source Products Table")
dbutils.widgets.text("output_table_name", "silver_sales_enriched", "Output Silver Table")
dbutils.widgets.text("database_name", "module2_db", "Database Name")

sales_table = dbutils.widgets.get("sales_table_name")
customers_table = dbutils.widgets.get("customers_table_name")
products_table = dbutils.widgets.get("products_table_name")
output_table = dbutils.widgets.get("output_table_name")
db_name = dbutils.widgets.get("database_name")

spark.sql(f"USE {db_name}")

print(f"Processing sales data for: {db_name}.{sales_table}")

try:
    df_sales = spark.table(f"{db_name}.{sales_table}")
    df_customers = spark.table(f"{db_name}.{customers_table}")
    df_products = spark.table(f"{db_name}.{products_table}")
except Exception as e:
    dbutils.notebook.exit(f"Failed to read one of the source tables: {e}")

# Calculate revenue and join data
df_sales_with_revenue = df_sales.withColumn("Revenue", round(col("Quantity") * col("UnitPrice"), 2))

df_enriched = df_sales_with_revenue.alias("sales") \
    .join(df_customers.alias("cust"), col("sales.CustomerID") == col("cust.CustomerName"), "left") \
    .join(df_products.alias("prod"), col("sales.Item") == col("prod.Item"), "left") \
    .select(
        col("sales.SalesOrderNumber"),
        col("sales.OrderDate"),
        col("sales.Item"),
        col("sales.Quantity"),
        col("sales.UnitPrice"),
        col("sales.Revenue"),
        col("cust.CustomerName"),
        col("cust.Region"),
        col("cust.CustomerType"),
        col("prod.Category")
    )

df_enriched.write.format("delta").mode("overwrite").saveAsTable(output_table)

silver_count = df_enriched.count()
print(f"Successfully processed and saved {silver_count} records to {output_table}.")

# Set task values for downstream tasks (like the For Each loop)
dbutils.jobs.taskValues.set(key="silver_table_name", value=output_table)
dbutils.jobs.taskValues.set(key="silver_table_count", value=silver_count)
dbutils.notebook.exit(f"Processing finished. Saved {silver_count} records to {output_table}.")