In [0]:
#07 Schema Merge feature
from pyspark.sql import SparkSession
from delta.tables import DeltaTable
from pyspark.sql.functions import lit

spark = SparkSession.builder.getOrCreate()
path = "dbfs:/FileStore/delta_merge_demo"

# CLEAN UP
dbutils.fs.rm(path, recurse=True)

# ── Step A: Create base table ──
# IDs 1–4 with vals A–D
spark.createDataFrame(
    [(1, "A"), (2, "B"), (3, "C"), (4, "D")],
    ["id", "val"]
).write.format("delta").mode("overwrite").save(path)

print("Initial (version 0):")
display(spark.read.format("delta").load(path).orderBy("id"))

# ── Step B: Prepare updates ──
# id=1 updated, id=2 marked for delete, id=5 new
updates = spark.createDataFrame(
    [(1, "A'"), (2, None), (5, "E")],
    ["id", "val"]
)

# ── Step C: MERGE
delta_tbl = DeltaTable.forPath(spark, path)
(delta_tbl.alias("t")
  .merge(
     source = updates.alias("s"),
     condition = "t.id = s.id"
  )
  .whenMatchedUpdate(condition="s.val IS NOT NULL", set={"val": "s.val"})
  .whenMatchedDelete(condition="s.val IS NULL")
  .whenNotMatchedInsert(values={"id": "s.id", "val": "s.val"})
  .execute()
)

print("After MERGE (version 1):")
display(spark.read.format("delta").load(path).orderBy("id"))

# ── Inspect history ──
print("DESCRIBE HISTORY:")
spark.sql(f"DESCRIBE HISTORY delta.`{path}`").show(truncate=False)