In [0]:
from pyspark.sql.functions import current_timestamp
from pyspark.sql import functions as F

In [0]:
import datetime as _dt
try:
    arrival_date = dbutils.widgets.get("arrival_date")
except Exception:
    arrival_date = _dt.date.today().strftime("%Y-%m-%d")
try:
    catalog = dbutils.widgets.get("catalog")
except Exception:
    catalog = "travel_bookings"
try:
    schema = dbutils.widgets.get("schema")
except Exception:
    schema = "default"
try:
    base_volume = dbutils.widgets.get("base_volume")
except Exception:
    base_volume = f"/Volumes/{catalog}/{schema}/data"


In [0]:
booking_path = f"{base_volume}/booking_data/bookings_{arrival_date}.csv"

df = (spark.read.format("csv")
      .option("header","true").option("inferSchema","true")
      .option("quote","\"").option("multiLine","true")
      .load(booking_path))

In [0]:
df = df.withColumn("ingestion_time", current_timestamp()) \
       .withColumn("business_date", F.to_date(F.lit(arrival_date)))


In [0]:
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.bronze")

df.write.format("delta").mode("append").saveAsTable(f"{catalog}.bronze.booking_inc")

print(f"Ingested rows: {df.count()}")
