In [0]:
from pyspark.sql.functions import date_format, col, count, lit
from pyspark.sql.types import StringType
from datetime import datetime, date

In [0]:
spark.conf.set("spark.databricks.delta.optimizeWrite.enabled","true")
spark.conf.set("spark.databricks.delta.retentionDurationCheck.enabled","false")
spark.conf.set("spark.databricks.optimizer.dynamicPartitionPruning","true")
spark.conf.set("spark.databricks.delta.properties.defaults.autoOptimize.autoCompact", "false")

In [0]:
%run "/Helpers/DataLoader"

In [0]:
dbutils.widgets.text("DateToProcess","")
DateToProcess = dbutils.widgets.get("DateToProcess")
DateToProcess = datetime.strptime(DateToProcess, "%Y-%m-%d").date()
DateToProcess_path = DateToProcess.strftime('%Y/%m/%d')
DateToProcess_str = DateToProcess.strftime('%Y%m%d')

In [0]:
raw_transaction = "/mnt/ADLS/Example/Raw/Transaction/"+DateToProcess_path+"/Transaction_"+DateToProcess_str+".csv"
delta_transaction = "/mnt/ADLS/Example/Prepared/Transaction/DELTA/"

In [0]:
df_transaction = data_loader(raw_transaction,'csv')
df_transaction = df_transaction.withColumn('year', date_format(col('date'),'yyyy').cast(StringType()))
df_transaction = df_transaction.withColumn('month', date_format(col('date'),'MM').cast(StringType()))
df_transaction = df_transaction.withColumn('day', date_format(col('date'),'dd').cast(StringType()))

df_transaction.createOrReplaceTempView('vw_Transaction')

In [0]:
%sql

DROP TABLE IF EXISTS Example_Prepared_Transaction_DELTA;

CREATE TABLE Example_Prepared_Transaction_DELTA (
  `date` STRING,
  `transaction_id` STRING,
  `product_id` STRING,
  `product_name` STRING,
  `customer_id` STRING,
  `cost` DOUBLE,
  `currency` STRING,
  `quantity` INTEGER,
  `year` STRING,
  `month` STRING,
  `day` STRING
)
USING delta
PARTITIONED BY (year, month, day)
LOCATION '/mnt/ADLS/Example/Prepared/Transaction/DELTA/'

In [0]:
InsertPO = spark.sql("""
  MERGE INTO Example_Prepared_Transaction_DELTA AS target 
  USING vw_Transaction AS source 
    ON target.`date` = source.`date`
    AND target.`transaction_id` = source.`transaction_id` 
    AND target.`year` = source.`year` 
    AND target.`month` = source.`month` 
    AND target.`day` = source.`day`
  WHEN MATCHED THEN 
    UPDATE SET 
      target.`product_id` = source.`product_id`,
      target.`product_name` = source.`product_name`,
      target.`customer_id` = source.`customer_id`,
      target.`cost` = source.`cost`,
      target.`currency` = source.`currency`,
      target.`quantity` = source.`quantity`
  WHEN NOT MATCHED THEN 
    INSERT (
      `date`,
      `transaction_id`,
      `product_id`, 
      `product_name`, 
      `customer_id`, 
      `cost`, 
      `currency`, 
      `quantity`, 
      `year`, 
      `month`, 
      `day`
    ) 
    VALUES ( 
      source.`date`, 
      source.`transaction_id`, 
      source.`product_id`, 
      source.`product_name`, 
      source.`customer_id`, 
      source.`cost`, 
      source.`currency`, 
      source.`quantity`, 
      source.`year`, 
      source.`month`, 
      source.`day`
    )
""")