In [0]:
#04_Time travel and versioning
from pyspark.sql import SparkSession
from delta.tables import DeltaTable
from pyspark.sql.functions import lit

spark = SparkSession.builder.getOrCreate()
path = "/local_disk0/tmp/delta_time_travel"

# CLEAN UP
dbutils.fs.rm(path, recurse=True)

# ── Version 0: Initial write ──
spark.range(0, 3) \
     .withColumn("val", lit("alpha")) \
     .write \
     .format("delta") \
     .mode("overwrite") \
     .save(path)

# ── Version 1: Append new rows ──
spark.range(3, 5) \
     .withColumn("val", lit("beta")) \
     .write \
     .format("delta") \
     .mode("append") \
     .save(path)

# ── Version 2: Update in place ──
DeltaTable.forPath(spark, path) \
  .update(
    condition = "id == 1",
    set       = {"id": "100", "val": "'gamma'"}
  )

# 1) Show commit history
print("=== DESCRIBE HISTORY ===")
spark.sql(f"DESCRIBE HISTORY delta.`{path}`").show(truncate=False)

# 2) Read as of version 0
print("→ VERSION 0 SNAPSHOT:")
spark.read.format("delta") \
     .option("versionAsOf", 0) \
     .load(path) \
     .orderBy("id") \
     .show()

# 3) Read as of version 1
print("→ VERSION 1 SNAPSHOT:")
spark.read.format("delta") \
     .option("versionAsOf", 1) \
     .load(path) \
     .orderBy("id") \
     .show()

# 4) Read latest (version 2)
print("→ VERSION 2 (LATEST):")
spark.read.format("delta") \
     .load(path) \
     .orderBy("id") \
     .show()

# 5) Read by timestamp (pick a timestamp between v1 & v2)
import datetime
ts = spark.sql(f"SELECT timestamp FROM (DESCRIBE HISTORY delta.`{path}`) WHERE version = 1").first()[0]
print(f"→ SNAPSHOT AS OF {ts}:")
spark.read.format("delta") \
     .option("timestampAsOf", ts) \
     .load(path) \
     .orderBy("id") \
     .show()