In [0]:

#This does the following 
# This demo-
# It creates a tiny Delta table under /tmp/scratch_demo/mytable, 
# shows  how the schema lives in the JSON log, 
# evolves when it is ALTERed , 
# and  how a checkpoint Parquet bundles all of that metadata for Spark to read.



dbutils.fs.rm("/tmp/scratch_demo", recurse=True)
dbutils.fs.mkdirs("/tmp/scratch_demo")


In [0]:
%sql
-- v0: Create  initial Delta table (version 0)

CREATE OR REPLACE TABLE delta.`/tmp/scratch_demo/mytable` (
  id   INT,
  val  STRING
)
USING DELTA;

-- Insert a single row so we get an "add" in the log
%sql
INSERT INTO delta.`/tmp/scratch_demo/mytable` VALUES (1, 'alpha');

-- Inspect the raw JSON log for v0

SELECT *
FROM json.`/tmp/scratch_demo/mytable/_delta_log/00000000000000000000.json`
-- You’ll see the first "metaData" .

-- v1: Evolve the schema via ALTER TABLE (version 1)

ALTER TABLE delta.`/tmp/scratch_demo/mytable`
  ADD COLUMNS (
    new_col DOUBLE
  );

-- append a row that uses the new column]

INSERT INTO delta.`/tmp/scratch_demo/mytable` 
VALUES (2, 'beta', 2.5);

--Inspect the JSON log for v1

SELECT *
FROM json.`/tmp/scratch_demo/mytable/_delta_log/00000000000000000001.json`;
-- Now  a second "metaData" entry whose schemaString includes the new_col field.

-- v2–v10: Generate more commits so we trigger a checkpoint

INSERT INTO delta.`/tmp/scratch_demo/mytable` VALUES (3,'gamma',3.0);
INSERT INTO delta.`/tmp/scratch_demo/mytable` VALUES (4,'delta',4.0);
INSERT INTO delta.`/tmp/scratch_demo/mytable` VALUES (5,'epsilon',5.0);
INSERT INTO delta.`/tmp/scratch_demo/mytable` VALUES (6,'zeta',6.0);
INSERT INTO delta.`/tmp/scratch_demo/mytable` VALUES (7,'eta',7.0);
INSERT INTO delta.`/tmp/scratch_demo/mytable` VALUES (8,'theta',8.0);
INSERT INTO delta.`/tmp/scratch_demo/mytable` VALUES (9,'iota',9.0);
INSERT INTO delta.`/tmp/scratch_demo/mytable` VALUES (10,'kappa',10.0);
-- After the 10th commit, Delta writes a checkpoint Parquet called
-- /tmp/scratch_demo/mytable/_delta_log/00000000000000000010.checkpoint.parquet

--force a checkpoint

ALTER TABLE delta.`/tmp/scratch_demo/mytable`
  SET TBLPROPERTIES ('delta.checkpointInterval' = '1');

-- 7) Examine the checkpoint Parquet

-- List the log folder to see the checkpoint file

%fs ls /tmp/scratch_demo/mytable/_delta_log

-- in SQL


-- Show only the metadata rows inside the checkpoint

SELECT
  version,
  metaData.schemaString
FROM parquet.`/tmp/scratch_demo/mytable/_delta_log/00000000000000000010.checkpoint.parquet`
WHERE metaData IS NOT NULL;

-- You’ll see two (or more) schemaString values: the one from v0, the one from v1 (with new_col), etc., and the last one is the live schema Spark uses.

-- 8) Let Delta read via its abstraction


DESCRIBE DETAIL delta.`/tmp/scratch_demo/mytable`;
DESCRIBE HISTORY delta.`/tmp/scratch_demo/mytable`;
	--•	DESCRIBE DETAIL shows the current schema (pulled from the last checkpoint’s metadata).
	--•	DESCRIBE HISTORY shows version 0→10, with  WRITE and ALTER operations logged.

  --Essence 
	--1.	Schema lives in the JSON log (metaData.schemaString in v0).
	--2.	ALTER TABLE emits a new metadata entry in v1.
	-- 3.	Checkpoint at v10 bundles all metadata actions into a Parquet snapshot.
	--4.	Delta engine (DESCRIBE DETAIL/HISTORY) reads the checkpoint directly—no JSON replay needed.

In [0]:
%sql
-- 1) Switch to the Hive metastore catalog
USE CATALOG hive_metastore;

-- 2) (Re)create the default schema if needed
CREATE SCHEMA IF NOT EXISTS default;

-- 3) Register  Delta files as an external Hive table
CREATE TABLE IF NOT EXISTS default.mytable
USING DELTA
LOCATION 'dbfs:/tmp/scratch_demo/mytable';

-- 4) Now this will succeed:
SELECT * FROM default.mytable;
