#### Silver Layer Use Cases (Curation Layer)
These focus on cleaning, joining, and enriching your raw data:

1. Customer Profile Enrichment
    - Join customer and address datasets to create a unified customer profile.
2. Order Flattening
    - Explode the items array in the orders dataset to create one row per item.
3. Customer Order History
    - Join orders with customers to track each customerâ€™s order history.
4. Order Status Normalization
    - Standardize order_status values (e.g., lowercase, remove whitespace).
5. Date Dimension Extraction
    - Extract date components from order_timestamp and created_date.

In [None]:
import dlt
from pyspark.sql.functions import col, current_date, trim, lower, year, month, day

#### 1. Customer Profile Enrichment

In [None]:
@dlt.table(
    name="cur_customer_profiles",
    partition_cols=["load_date"]
)
def customer_profiles():
    customers = dlt.read_stream("raw_customers")
    addresses = dlt.read_stream("raw_addresses")

    customer_profiles_df = customers.join(addresses, on="customer_id",how="inner").select(
        customers["*"],
        addresses["address_line_1"],
        addresses["city"],
        addresses["state"],
        addresses["postcode"]
    )\
    .withColumn("load_date", current_date())
    return customer_profiles_df

#### 2. Customer Order History

In [None]:
@dlt.table(
    name="cur_customer_order_history",
    partition_cols=["load_date"]
)
def customer_order_history():
    orders = dlt.read_stream("raw_orders")
    customers = dlt.read_stream("raw_customers")

    customer_order_history_df = orders.join(customers, on="customer_id",how="inner").select(
        orders["*"],
        customers["customer_name"],
        customers["email"]
    )\
    .withColumn("load_date", current_date())
    return customer_order_history_df

#### 3. Order Status Normalization

In [None]:
@dlt.table(
    name="cur_order_status_normalized",
    partition_cols=["load_date"]
)
def order_status_normalized():
    orders = dlt.read_stream("raw_orders")

    order_status_normalized_df = orders\
    .withColumn("order_status", trim(lower(col("order_status"))))\
    .withColumn("load_date", current_date())

    return order_status_normalized_df

#### 4. Date Dimension Extraction

In [None]:
@dlt.table(
    name="cur_date_dimension_extract",
    partition_cols=["load_date"]
)
def date_dimension_extract():
    orders = dlt.read_stream("raw_orders")

    date_dimension_extract_df = orders\
    .withColumn("order_year", year(col("order_timestamp")))\
    .withColumn("order_month", month(col("order_timestamp")))\
    .withColumn("order_day", day(col("order_timestamp")))\
    .withColumn("load_date", current_date())

    return date_dimension_extract_df