In [0]:
import dlt
from datetime import datetime
from pyspark.sql.functions import lit,current_timestamp,date_format,col
full_path = "/Volumes/dltcatalog/dltvolume/volume"

In [0]:
@dlt.table(name = 'bronze_employee_scd2')
def bronze_employee():
  df = spark.readStream.format("cloudFiles").option("cloudFiles.format","csv").option("header","true").option("sep",",").schema("ID int,Name string,DOB int,Gender string,Country string,Email string,ProcessedDate string").load(full_path)
  df = df.withColumn("ProcessedDate",lit(date_format(current_timestamp(),"yyyy-MM-dd HH:mm:ss")))
  return df

In [0]:
dlt.create_streaming_live_table("silver_employee_scd2")

In [0]:
dlt.apply_changes(
  target = "silver_employee_scd2",
  source = "bronze_employee_scd2",
  keys = ["ID"],
  stored_as_scd_type = 2,
  sequence_by = "ProcessedDate",
  track_history_column_list = ["ProcessedDate"]
)

In [0]:
def schema_check():
    try:
        dlt.create_schema("gold")
        print("Schema 'gold' created in DLT")
    except Exception as e:
        if "already exists" in str(e):
            print("Schema 'gold' already exists in DLT")
        else:
            print("Error creating schema: ", e)

In [0]:
@dlt.table(name = 'gold.gold_employee')
@dlt.expect("checking for current active records","(__END_AT is null)")
def gold_employee():
    schema_check()
    df = dlt.readStream("silver_employee_scd2")
    df = df.withColumn("__END_AT",lit("9999-12-30"))
    display(df)
    return df