In [0]:
# --------------------------------
# Import Libraries
# --------------------------------
from pyspark.sql import SparkSession, Row
from datetime import datetime
import uuid
import traceback

# --------------------------------
# Initialize Spark Session
# --------------------------------
spark = SparkSession.builder.appName("Spark DataFrames").getOrCreate()

# --------------------------------
# Job ID for this run
# --------------------------------
job_id = str(uuid.uuid4())

# --------------------------------
# Logging function
# --------------------------------
def log_step(step_name, status="INFO", message=""):
    log_df = spark.createDataFrame([
        Row(
            job_id=job_id,
            timestamp=datetime.utcnow().isoformat(),
            step=step_name,
            status=status,
            message=message
        )
    ])
    (
        log_df.write
        .format("delta")
        .mode("append")
        .saveAsTable("project_logs.bronze_schema.log")   # log table
    )

# --------------------------------
# ETL Process with Logging
# --------------------------------
try:
    log_step("START", "INFO", "Starting Bronze ingestion from S3")

    # Read Delta from S3
    df = spark.read.format("delta").load("s3://dbsawsproject/household/")
    row_count = df.count()
    log_step("LOAD_S3", "INFO", f"Loaded {row_count} records from S3")

    # Write to Bronze table
    df.write.format("delta").mode("overwrite").saveAsTable("aws_dataingestion.bronze_aws.household")
    log_step("SAVE_BRONZE", "SUCCESS", "Saved data to Bronze table aws_dataingestion.bronze_aws.household")

    # Show sample records
    df.show(10)
    log_step("SHOW_DATA", "INFO", "Displayed first 10 records")

    log_step("END", "SUCCESS", "Bronze ingestion completed successfully ✅")

except Exception as e:
    log_step("ERROR", "FAIL", traceback.format_exc())
    raise
