In [0]:
# Aspect 4 - Schema Evolution 
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit, col

spark = SparkSession.builder.getOrCreate()
path = "dbfs:/FileStore/delta_schema_demo"

# CLEAN UP
dbutils.fs.rm(path, recurse=True)

# ── Step A: INITIAL WRITE ──
# Create a table with schema (id: LONG, val: STRING)
spark.range(0, 3) \
     .withColumn("val", lit("alpha")) \
     .write \
     .format("delta") \
     .mode("overwrite") \
     .save(path)

print("Version 0 snapshot:")
display(spark.read.format("delta").load(path))

# ── Step B: TRY A MISMATCHED WRITE ──
# This DataFrame has an extra column 'new_col'
bad_df = spark.range(3, 6) \
              .withColumn("val", lit("beta")) \
              .withColumn("new_col", lit(99.9))

try:
    bad_df.write.format("delta").mode("append").save(path)
except Exception as e:
    print("🛑 Write rejected due to schema mismatch:\n", e)

# ── Step C: ALLOW EVOLUTION & APPEND ──
# Now enable schema merging to accept the new column
bad_df.write \
     .format("delta") \
     .mode("append") \
     .option("mergeSchema", "true") \
     .save(path)

print("Version 2 snapshot (merged schema):")
spark.read \
     .format("delta") \
     .option("mergeSchema", "true") \
     .load(path) \
     .show()

# ── Inspect history & schema ──
print("DESCRIBE HISTORY:")
spark.sql(f"DESCRIBE HISTORY delta.`{path}`").show(truncate=False)

print("Current schema:")
spark.read.format("delta").load(path).printSchema()