In [None]:
from pyspark.sql import functions as F
from pyspark.sql import SparkSession
from pyspark.sql.types import (StructType, StructField, StringType,
                               IntegerType, BooleanType, TimestampType,
                              ArrayType, MapType)

from google.cloud import bigquery

spark = SparkSession.builder.appName("DimDate") \
.config("spark.jars.packages", "com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:0.24.0") \
.getOrCreate()

start_date = '1962-01-01'
end_date = '2023-02-28'


# Create a PySpark DataFrame with the start and end dates
df = spark.createDataFrame([(start_date, end_date)], ["start_date", "end_date"])

# Generate a range of dates as a column
df = df.selectExpr("explode(sequence(to_date(start_date), to_date(end_date))) as date_value")

df = df.select(
    F.col('date_value'),
    F.dayofmonth("date_value").alias('day_of_month'),
    F.date_format('date_value', 'EEEE').alias('day_of_week'),
    F.month("date_value").alias('month_number'),
    F.date_format('date_value', 'MMMM').alias('month_name'),
    F.year("date_value").alias('year'),
    F.quarter('date_value').alias('quarter'),    
)

result_df = df.withColumn('DateKey',  df.year * 10000 + df.month_number * 100 + df.day_of_month)

# create a BigQuery client and dataset reference
client = bigquery.Client(project='noted-span-377814')
dataset_ref = client.dataset('Stocks_DW')


# create a BigQuery table and upload the data
table_ref = dataset_ref.table('DimDate')

job_config = bigquery.LoadJobConfig(write_disposition='WRITE_TRUNCATE')
job = client.load_table_from_dataframe(result_df.toPandas(), table_ref, job_config=job_config)
print(job.result())

spark.stop()