In [0]:
from pyspark.sql import functions as F

p_start_date = "2020-01-01"
p_end_date = "2029-12-31"

# Create a DF with one row per DateDT
date_df = (
    spark.sql(f"SELECT sequence(DATE('{p_start_date}'), DATE('{p_end_date}'), INTERVAL 1 DAY) AS date_seq")
         .withColumn("DateDT", F.explode("date_seq"))
         .drop("date_seq")
)

# Add DateDT components
date_df = (
    date_df
    .withColumn("DateSK", F.date_format("DateDT", "yyyyMMdd").cast("int"))
    .withColumn("YearID", F.year("DateDT"))
    .withColumn("QuarterID", F.quarter("DateDT"))
    .withColumn("MonthID", F.month("DateDT"))
    .withColumn("DayID", F.dayofmonth("DateDT"))
    .withColumn("DayOfWeekID", F.dayofweek("DateDT"))
    .withColumn("WeekOfYearID", F.weekofyear("DateDT"))
    .withColumn("DayName", F.date_format("DateDT", "EEEE"))
    .withColumn("MonthName", F.date_format("DateDT", "MMMM"))
    .withColumn("WeekendFlag", F.col("DayOfWeekID").isin([1, 7]))
)


In [0]:
display(date_df)

In [0]:
date_df.write.mode("overwrite").saveAsTable("silver.dim_date")

In [0]:
%sql
SELECT *
FROM silver.dim_date
--WHERE WeekendFlag = true
;

In [0]:
%sql
SELECT sequence(DATE'2018-01-01', DATE'2018-01-05', INTERVAL 1 DAY)

In [0]:
%sql
SELECT 
  CAST('2021-03-21' AS DATE), 
  DATE('2021-03-21'), --Synonym of CAST(expr AS DATE)
  DATE '2021-03-21', 
  TO_DATE('2021-03-21') --Function (it needs import in PySpark)