In [0]:
SELECT * FROM parquet. `/Volumes/databricks_simulated_e_commerce_clickstream_data/v01/raw/users-historical/`
LIMIT 10;

In [0]:
CREATE TABLE history_users_bronze_with_metadata_ctas
AS
SELECT *,
 cast(from_unixtime(user_first_touch_timestamp/1000000) as date) as first_touch_date,
 _metadata.file_name,
 _metadata.file_modification_time,
 current_timestamp() as ingestion_time
 FROM parquet. `/Volumes/databricks_simulated_e_commerce_clickstream_data/v01/raw/users-historical/`;

In [0]:
SELECT * FROM history_users_bronze_with_metadata_ctas

In [0]:
%python

from pyspark.sql.functions import col,from_unixtime,current_timestamp
from pyspark.sql.types import DateType

#1. Read parquet files from cloud storage to data frame
df = (
     spark.read.format("parquet").load("/Volumes/databricks_simulated_e_commerce_clickstream_data/v01/raw/users-historical/")
)

# 2. Add Metadata columns
df_withmetadata = (
    df.withColumn("first_touch_date", from_unixtime(col("user_first_touch_timestamp")/1000000).cast(DateType())).
    withColumn("file_name", col("_metadata.file_name")).
    withColumn("file_modification_time", col("_metadata.file_modification_time")).
    withColumn("current_time_Stamp", current_timestamp())                                                
)

#3. save as a Delta table
(
    df_withmetadata
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("workspace.default.history_users_bronze_with_metadata_python")
)

#4. read and display delta table
historical_users_bronze_python_metadata = spark.table("workspace.default.history_users_bronze_with_metadata_python")

display(historical_users_bronze_python_metadata)