In [0]:
%sql
create table if not exists main.ecommerce.dim_customer
(
customer_id STRING,
customer_unique_id STRING,
customer_zip_code_prefix STRING,
customer_city STRING,
customer_state STRING,

effective_start_date DATE,
effective_end_date DATE,
is_Active STRING
)
USING DELTA;

In [0]:

from pyspark.sql import SparkSession
from delta.tables import DeltaTable
from pyspark.sql.functions import lit

customer_table = spark.table("main.ecommerce.customer")
customer_scd = customer_table\
    .withColumn(("is_active"),lit("Y"))\
    .withColumn(("effective_start_date"),lit("2025-12-23"))\
    .withColumn(("effective_end_date"),lit("9999-12-31"))

display(customer_scd)
# Existing Customer Delta Table
dim_customer = DeltaTable.forName(spark, "main.ecommerce.dim_customer")

# Create Customer SCD
merge_condition = """
    t.customer_id = s.customer_id
    AND t.is_active = 'Y'
"""
# SCD-2 Merge customer
dim_customer.alias("t").merge(
    customer_scd.alias("s"),merge_condition
).whenMatchedUpdate(
    condition ="""
        t.customer_unique_id <> s.customer_unique_id OR
        t.customer_zip_code_prefix <> s.customer_zip_code_prefix OR
        t.customer_city <> s.customer_city OR
        t.customer_state <> s.customer_state
    """,
    set={
        "effective_end_date": "current_date()",
        "is_active": "'N'"
    }
    ).whenNotMatchedInsert(
    values={
        "customer_id": "s.customer_id",
        "customer_unique_id": "s.customer_unique_id",
        "customer_zip_code_prefix": "s.customer_zip_code_prefix",
        "customer_city": "s.customer_city",
        "customer_state": "s.customer_state",
        "effective_start_date": "s.effective_start_date",
        "effective_end_date": "s.effective_end_date",
        "is_active": "s.is_active"
    }
).execute()

