In [0]:
# 02_ACID Properties
from delta.tables import DeltaTable
from pyspark.sql import SparkSession
import os

def list_parquet_files(directory):
    print(f"\n=== Parquet files in: {directory} ===")
    for root, dirs, files in os.walk(directory):
        for file in sorted(files):
            if file.endswith(".parquet"):
                print(os.path.join(root, file))

spark = SparkSession.builder.getOrCreate()
path  = "/local_disk0/tmp/delta_acid_demo"

# ── Step A: VERSION 0 ──
# Create a brand-new Delta table with 5 rows
spark.range(0, 5) \
     .write \
     .format("delta") \
     .mode("overwrite") \
     .save(path)

list_parquet_files(path)

# ── Step B: VERSION 1 ──
# Append 5 more rows
spark.range(5, 10) \
     .write \
     .format("delta") \
     .mode("append") \
     .save(path)

list_parquet_files(path)

# Step C: VERSION 2 ──
deltaTable = DeltaTable.forPath(spark, path)
deltaTable.update(
     condition = "id == 2",
     set = {"id": "200"}
)
list_parquet_files(path)

print ("History")
spark.sql(f"DESCRIBE HISTORY delta.`{path}`").show(truncate=False)

print("-version 0")
spark.read.format("delta").option("versionAsOf",0).load(path).show()

print("-version 1")
spark.read.format("delta").option("versionAsOf",1).load(path).show()  

print(" Latest version")
spark.read.format("delta").load(path).show()  









In [0]:
# Scratch
# Paths for the Delta table and the streaming checkpoint
delta_path = "dbfs:/FileStore/delta_unified_demo"
checkpoint = "dbfs:/FileStore/delta_unified_demo_checkpoint"

# Clean up old data if present
dbutils.fs.rm(delta_path, recurse=True)
dbutils.fs.rm(checkpoint, recurse=True)

In [0]:
#Scratch
# CELL 2: BATCH WRITE
# Write IDs 0–4 in one batch
spark.range(0, 5) \
     .write \
     .format("delta") \
     .mode("overwrite") \
     .save(delta_path)

print("Batch data:")
display(spark.read.format("delta").load(delta_path))

In [0]:
#Checkpoint demo - 
#Can skip - since there is one more code sample one below 
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()
delta_path = "dbfs:/FileStore/delta_stream_demo"
checkpoint =  "dbfs:/FileStore/delta_stream_demo_ckpt"

# 1) start a streaming write: 
stream_df = (
    spark.readStream.format("rate")
         .option("rowsPerSecond", 10)   # 10 events/sec
         .load()
         .selectExpr("value AS event_id")
)

stream_query = (
    stream_df
      .writeStream
      .format("delta")
      .option("checkpointLocation", checkpoint)
      .outputMode("append")
      .trigger(availableNow=True)
      .start(delta_path)
)

# Let it run for a few seconds, then stop
import time; time.sleep(5)
stream_query.stop()

# 2) Inspect the files & log
print("Files on disk:")
display(dbutils.fs.ls(delta_path))
print("\nDelta log entries:")
display(dbutils.fs.ls(delta_path + "/_delta_log"))
print("\nLatest snapshot:")
display(spark.read.format("delta").load(delta_path).orderBy("event_id"))