In [0]:
import datetime as _dt
try:
    arrival_date = dbutils.widgets.get("arrival_date")
except Exception:
    arrival_date = _dt.date.today().strftime("%Y-%m-%d")
try:
    catalog = dbutils.widgets.get("catalog")
except Exception:
    catalog = "travel_bookings"
try:
    schema = dbutils.widgets.get("schema")
except Exception:
    schema = "default"
try:
    base_volume = dbutils.widgets.get("base_volume")
except Exception:
    base_volume = f"/Volumes/{catalog}/{schema}/data"

from pyspark.sql import functions as F

In [0]:
customer_path = f"{base_volume}/customer_data/customers_{arrival_date}.csv"

df = (spark.read.format("csv")
      .option("header","true").option("inferSchema","true")
      .option("quote","\"").option("multiLine","true")
      .load(customer_path))


In [0]:
out = (df
  .withColumn("valid_from", F.to_date(F.lit(arrival_date)))
  .withColumn("valid_to", F.to_date(F.lit("9999-12-31")))
  .withColumn("is_current", F.lit(True))
  .withColumn("business_date", F.to_date(F.lit(arrival_date))))


In [0]:
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.bronze")

out.write.format("delta").mode("append").saveAsTable(f"{catalog}.bronze.customer_inc")

print(f"Ingested rows: {out.count()}")
