In [0]:
import dlt
from pyspark.sql.functions import col

@dlt.table(
  name="bronze_sales",
  comment="Raw sales data ingested to Bronze layer",
  table_properties={
    "quality": "bronze"
  }
)
def bronze_sales():
    return (
        spark.read.option("header", "true")
                  .option("inferSchema", "true")
                  .csv("/Volumes/labdaysix/default/data/sales.csv")
    )

@dlt.table(
  name="silver_sales",
  comment="Cleaned and deduplicated Silver sales data"
)
@dlt.expect("valid_quantity", "Quantity > 0")
@dlt.expect_or_drop("no_nulls", "CustomerId IS NOT NULL")
def silver_sales():
    df = dlt.read("bronze_sales")
    return df.filter(col("Quantity") > 0).dropDuplicates(["SalesOrderNumber", "SalesOrderLineNumber"])
