In [1]:
from pyspark.sql.functions import *
from datetime import datetime, timedelta

print("POPULATING DimDate")

start_date = datetime(2019, 1, 1)
end_date = datetime(2024, 12, 31)
date_range = [(start_date + timedelta(days=x)) for x in range((end_date - start_date).days + 1)]

holidays = {
    # 2019
    datetime(2019, 1, 1), datetime(2019, 7, 4), datetime(2019, 11, 28), datetime(2019, 12, 25),
    # 2020
    datetime(2020, 1, 1), datetime(2020, 7, 4), datetime(2020, 11, 26), datetime(2020, 12, 25),
    # 2021
    datetime(2021, 1, 1), datetime(2021, 7, 4), datetime(2021, 11, 25), datetime(2021, 12, 25),
    # 2022
    datetime(2022, 1, 1), datetime(2022, 7, 4), datetime(2022, 11, 24), datetime(2022, 12, 25),
    # 2023
    datetime(2023, 1, 1), datetime(2023, 7, 4), datetime(2023, 11, 23), datetime(2023, 12, 25),
    # 2024
    datetime(2024, 1, 1), datetime(2024, 7, 4), datetime(2024, 11, 28), datetime(2024, 12, 25),
}

dim_date_data = [
    (
        int(d.strftime("%Y%m%d")),     
        d,                            
        d.year,                         
        (d.month - 1) // 3 + 1,    
        d.month,                        
        d.strftime("%B"),              
        d.day,                          
        d.isoweekday(),                 
        d.strftime("%A"),               
        1 if d.isoweekday() >= 6 else 0, 
        1 if d in holidays else 0,       
        d.isocalendar()[1]              
    )
    for d in date_range
]

df_dim_date = spark.createDataFrame(
    dim_date_data,
    ["DateKey", "Date", "Year", "Quarter", "Month", "MonthName", 
     "Day", "DayOfWeek", "DayName", "IsWeekend", "IsHoliday", "WeekOfYear"]
)

print(f"\nGenerated {df_dim_date.count()} date records")
print("\nSample data:")
df_dim_date.show(10)

print("\nWriting to Lakehouse table: DimDate")
df_dim_date.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("DimDate")

print("DimDate created successfully!")

StatementMeta(, 49283aab-0186-4670-bba8-6a9e6799bbcc, 3, Finished, Available, Finished)

POPULATING DimDate

Generated 2192 date records

Sample data:
+--------+-------------------+----+-------+-----+---------+---+---------+---------+---------+---------+----------+
| DateKey|               Date|Year|Quarter|Month|MonthName|Day|DayOfWeek|  DayName|IsWeekend|IsHoliday|WeekOfYear|
+--------+-------------------+----+-------+-----+---------+---+---------+---------+---------+---------+----------+
|20190101|2019-01-01 00:00:00|2019|      1|    1|  January|  1|        2|  Tuesday|        0|        1|         1|
|20190102|2019-01-02 00:00:00|2019|      1|    1|  January|  2|        3|Wednesday|        0|        0|         1|
|20190103|2019-01-03 00:00:00|2019|      1|    1|  January|  3|        4| Thursday|        0|        0|         1|
|20190104|2019-01-04 00:00:00|2019|      1|    1|  January|  4|        5|   Friday|        0|        0|         1|
|20190105|2019-01-05 00:00:00|2019|      1|    1|  January|  5|        6| Saturday|        1|        0|         1|
|20190106|2019-01-