databricks notebook6: create Time dimension table

In [None]:
from pyspark.sql.functions import monotonically_increasing_id, year, month, dayofmonth, date_format, to_date

In [None]:
# Load Silver trip data
trip_silver_path = "abfss://trip-data@<your-storage-account>.dfs.core.windows.net/silver/trip_transactions"
trip_df = spark.read.format("delta").load(trip_silver_path)

In [None]:
# Extract TripDate from start and end timestamps
start_dates = trip_df.select(to_date("trip_start_timestamp").alias("TripDate")).dropna()
end_dates = trip_df.select(to_date("trip_end_timestamp").alias("TripDate")).dropna()

In [None]:
# Combine and remove duplicate dates
dim_date = start_dates.union(end_dates).dropDuplicates(["TripDate"])

In [None]:
# Enrich with date components
dim_date = dim_date.withColumn("KeydateID", monotonically_increasing_id()) \
    .withColumn("Year", year("TripDate")) \
    .withColumn("Month", month("TripDate")) \
    .withColumn("Day", dayofmonth("TripDate")) \
    .withColumn("Weekday", date_format("TripDate", "E"))

In [None]:
# Reorder columns
dim_date = dim_date.select("KeydateID", "TripDate", "Year", "Month", "Day", "Weekday")

In [None]:
# Save to Silver layer
dim_date.write.format("delta").mode("overwrite").save("abfss://trip-data@<your-storage-account>.dfs.core.windows.net/silver/dim_date")