In [12]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, year, month, dayofmonth, to_date
import datetime

In [2]:
# Initialize Spark Session
spark = SparkSession.builder.appName("FacebookAdsFactSilver").getOrCreate()

In [3]:
# Define Storage Account & Container
storage_account_name = "learningstorage1093"
container_name = "learning"
bronze_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/bronze/facebook_ads/fact_facebook_ads_metrics/historical/"
silver_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/silver/facebook_ads/fact_facebook_ads_metrics/"


In [4]:
# Load raw data from Bronze Layer
df_bronze = spark.read.json(bronze_path)

In [6]:
print("Data Schema")
df_bronze.printSchema()

In [8]:
print("Sample Data:")
display(df_bronze)

In [20]:
# Selecting all columns & modifying schema
df_silver = df_bronze.select(
    to_date(col("date"), "yyyy-MM-dd").alias("date"),  # Convert to DATE type
    col("ad_id").alias("ad_id"),
    col("adset_id").alias("adset_id"),
    col("campaign_id").alias("campaign_id"),
    col("account_id").alias("account_id"),
    col("platform_position").alias("placement"),
    col("publisher_platform").alias("platform"),
    
    # Numeric Fields 
    col("spend").cast("float").alias("spend"),
    col("impressions").cast("int").alias("impressions"),
    col("clicks").cast("int").alias("clicks"),
    col("frequency").cast("float").alias("frequency"),
    
    # Actions Fields 
    # col("actions_complete_registration").cast("int").alias("actions_complete_registration"),
    col("actions_lead").cast("int").alias("leads"),  
    col("actions_post_engagement").cast("int").alias("ad_engagement"),
    col("actions_video_view").cast("int").alias("video_views"),
    col("video_thruplay_watched_actions_video_view").cast("int").alias("thruplay_video_views"),
    
    # Metadata Fields
    col("load_date").cast("timestamp").alias("load_date"),
    col("source").alias("source")
)

In [21]:
df_silver.printSchema()

In [22]:
print("Silver Data:")
display(df_silver)

In [23]:
# Partitioning by Year/Month/Day
df_silver = df_silver.withColumn("year", year(col("date"))) \
                     .withColumn("month", month(col("date"))) \
                     .withColumn("day", dayofmonth(col("date")))

In [24]:
# Save transformed data to Silver Layer in Parquet format
print("🚀 Saving `fact_facebook_ads_metrics` to Silver Layer...")
df_silver.write.mode("overwrite").partitionBy("year", "month", "day").parquet(silver_path)
print(f"✅ Successfully saved `fact_facebook_ads_metrics` to {silver_path}")