In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, year, month, dayofmonth, to_date, to_timestamp
import datetime

In [4]:
# Initialize Spark Session
spark = SparkSession.builder.appName("FacebookAdsDimPlatformSilver").getOrCreate()

In [7]:
# Define Storage Account & Container
storage_account_name = "learningstorage1093"
container_name = "learning"
bronze_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/bronze/facebook_ads/dim_platform/"
silver_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/silver/facebook_ads/dim_platform/"


In [8]:
# Load raw data from Bronze Layer
df_bronze = spark.read.json(bronze_path)

In [9]:
print("Data Schema")
df_bronze.printSchema()

In [10]:
# Select & Modify Schema
df_silver = df_bronze.select(
    col("device_platform").alias("device_platform"),
    col("platform_position").alias("platform_position"),
    col("publisher_platform").alias("publisher_platform"),
    
    # Metadata Fields
    col("load_date").cast("timestamp").alias("load_date"),
    col("source").alias("source")
)

In [11]:
df_silver.printSchema()

In [12]:
print("Sample Data:")
display(df_silver)

In [13]:
# Save Transformed Data to Silver Layer in Parquet format
print("Saving `dim_platform` to Silver Layer...")
df_silver.write.mode("overwrite").parquet(silver_path)
print(f"Successfully saved `dim_platform` to {silver_path}")
