In [0]:
%run ./01-config

In [0]:
class SetupHelper():
    def __init__(self, env):
        Conf = Config()
        self.landing_zone = Conf.base_data_path
        self.catalog = env
        self.db_name = Conf.db_name
        self.initialized = False

    def insert_date_table(self, csv_filename):  
        from pyspark.sql.functions import col, to_timestamp
        from pyspark.sql.types import IntegerType, ShortType, ByteType

        try:
            csv_path = f"{self.landing_zone}/{csv_filename}"
            df = spark.read.option("header", "true").option("inferSchema", "false").csv(csv_path)
            df_processed = df.select(
                col("date_key").cast(IntegerType()).alias("date_key"),
                to_timestamp(col("full_date"), "dd-MM-yy HH:mm").alias("full_date"),
                col("day_of_week").cast(ByteType()).alias("day_of_week"),
                col("day_num_in_month").cast(ByteType()).alias("day_num_in_month"),
                col("day_num_overall").cast(ShortType()).alias("day_num_overall"),
                col("day_name").alias("day_name"),
                col("day_abbrev").alias("day_abbrev"),
                col("weekday_flag").alias("weekday_flag"),
                col("week_num_in_year").cast(ByteType()).alias("week_num_in_year"),
                col("week_num_overall").cast(ShortType()).alias("week_num_overall"),
                to_timestamp(col("week_begin_date"), "dd-MM-yy HH:mm").alias("week_begin_date"),
                col("week_begin_date_key").cast(IntegerType()).alias("week_begin_date_key"),
                col("month").cast(ByteType()).alias("month"),
                col("month_num_overall").cast(ShortType()).alias("month_num_overall"),
                col("month_name").alias("month_name"),
                col("month_abbrev").alias("month_abbrev"),
                col("quarter").cast(ByteType()).alias("quarter"),
                col("year").cast(ShortType()).alias("year"),
                col("yearmo").cast(IntegerType()).alias("yearmo"),
                col("fiscal_month").cast(ByteType()).alias("fiscal_month"),
                col("fiscal_quarter").cast(ByteType()).alias("fiscal_quarter"),
                col("fiscal_year").cast(ShortType()).alias("fiscal_year"),
                col("last_day_in_month_flag").alias("last_day_in_month_flag"),
                to_timestamp(col("same_day_year_ago_date"), "dd-MM-yy HH:mm").alias("same_day_year_ago_date")
            )

            table_name = f"{self.catalog}.{self.db_name}.date_lookup_bz"
            df_processed.write.mode("overwrite").insertInto(table_name)
            print("Done")
            
        except Exception as e:
            print(f"Error: {e}")

setup = SetupHelper("dev")
setup.insert_date_table("date.csv")

In [0]:
%sql
use catalog `dev`; select * from `ecommerce_db`.`date_lookup_bz` limit 100;