In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, year, month, dayofmonth, to_date, to_timestamp
import datetime

In [3]:
# Initialize Spark Session
spark = SparkSession.builder.appName("FacebookAdsDimCampaignSilver").getOrCreate()

In [4]:
# Define Storage Account & Container
storage_account_name = "learningstorage1093"
container_name = "learning"
bronze_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/bronze/facebook_ads/dim_campaign/historical/"
silver_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/silver/facebook_ads/dim_campaign/"


In [5]:
# Load raw data from Bronze Layer
df_bronze = spark.read.json(bronze_path)

In [6]:
print("Data Schema")
df_bronze.printSchema()

In [10]:
df_silver = df_bronze.select(
    col("campaign_id").alias("campaign_id"),  # Primary Key
    col("campaign_name").alias("campaign_name"),
    col("campaign_objective").alias("campaign_objective"),
    col("campaign_status").alias("campaign_status"),
    
    # Convert timestamp to correct format
    to_timestamp(col("campaign_created_time"), "yyyy-MM-dd'T'HH:mm:ssX").alias("campaign_created_time"),

    # Metadata Fields
    col("load_date").cast("timestamp").alias("load_date"),
    col("source").alias("source")
)

In [13]:
# Partitioning by Year of `campaign_created_time`
df_silver = df_silver.withColumn("year", year(col("campaign_created_time")))

In [14]:
df_silver.printSchema()

In [15]:
print("Sample Data:")
display(df_silver)

In [16]:
# Save Transformed Data to Silver Layer in Parquet format
print("🚀 Saving `dim_campaign` to Silver Layer...")
df_silver.write.mode("overwrite").partitionBy("year").parquet(silver_path)
print(f"Successfully saved `dim_campaign` to {silver_path}")