## Stream Customers Data From Cloud Files to Delta Lake using AutoLoader File Options
1. Read files from cloud storage using AutoLoader 
1. Transform the dataframe to add the following columns
    -   file path: Cloud file path
    -   ingestion date: Current Timestamp
1. Write the transformed data stream to Delta Lake Table

In [0]:
# 1.
from pyspark.sql.types import *
from pyspark.sql import functions as F


In [0]:
%sql
drop table gizmobox.bronze.customers_autoloader

In [0]:
customer_df = (
        spark.readStream 
        .format("cloudFiles")
        .option("cloudFiles.format", "json")
        .option("cloudFiles.schemaLocation", "/Volumes/gizmobox/landing/operational_data/customers_autoloader/_schema")
        .option("cloudFiles.inferColumnTypes", "true")
        .option("cloudFiles.schemaHints", "date_of_birth DATE, member_since DATE, created_timestamp TIMESTAMP")
        .option("pathGlobFilter", "customers_2024_*.json")
        .load("/Volumes/gizmobox/landing/operational_data/customers_autoloader/")
)

In [0]:
# 2.
customers_transormed_df = (
           customer_df.withColumn("file_path", F.col("_metadata.file_path"))
                      .withColumn("ingestion_date", F.current_timestamp())
)

In [0]:
# 3.

streaming_query = (
                    customers_transormed_df.writeStream
                        .format("delta")
                        .option("checkpointLocation", "/Volumes/gizmobox/landing/operational_data/customers_autoloader/_checkpoint_autoloader")
                        .toTable("gizmobox.bronze.customers_autoloader")
)

In [0]:
streaming_query.stop()

In [0]:
%sql

select * from gizmobox.bronze.customers_autoloader