In [22]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, lit, year, month, dayofmonth, to_date, to_timestamp
import datetime

StatementMeta(practice, 14, 23, Finished, Available, Finished)

In [23]:
# Initialize Spark Session
spark = SparkSession.builder.appName("FacebookAdsDimAdSilver").getOrCreate()

StatementMeta(practice, 14, 24, Finished, Available, Finished)

In [24]:
# Define Storage Account & Container
storage_account_name = "learningstorage1093"
container_name = "learning"
bronze_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/bronze/facebook_ads/dim_ad/historical/"
silver_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/silver/facebook_ads/dim_ad/"


StatementMeta(practice, 14, 25, Finished, Available, Finished)

In [25]:
# Load raw data from Bronze Layer
df_bronze = spark.read.json(bronze_path)

StatementMeta(practice, 14, 26, Finished, Available, Finished)

In [26]:
print("Data Schema")
df_bronze.printSchema()

StatementMeta(practice, 14, 27, Finished, Available, Finished)

Data Schema
root
 |-- ad_created_time: string (nullable = true)
 |-- ad_id: string (nullable = true)
 |-- ad_name: string (nullable = true)
 |-- adcontent: string (nullable = true)
 |-- load_date: string (nullable = true)
 |-- source: string (nullable = true)
 |-- year: integer (nullable = true)



In [27]:
print("Sample Data:")
display(df_bronze)

StatementMeta(practice, 14, 28, Finished, Available, Finished)

Sample Data:


SynapseWidget(Synapse.DataFrame, 3deaf0cc-a3d1-4a69-9a5b-03324b6088e9)

In [28]:
# Selecting all columns & modifying schema
df_silver = df_bronze.select(
    to_timestamp(col("ad_created_time"), "yyyy-MM-dd'T'HH:mm:ssX").alias("ad_created_time"),
    col("ad_id").alias("ad_id"),
    col("ad_name"),
    col("adcontent").alias("ad_content"),
    # Metadata Fields
    col("load_date").cast("timestamp").alias("load_date"),
    col("source").alias("source")
)

StatementMeta(practice, 14, 29, Finished, Available, Finished)

In [29]:
df_silver.printSchema()

StatementMeta(practice, 14, 30, Finished, Available, Finished)

root
 |-- ad_created_time: timestamp (nullable = true)
 |-- ad_id: string (nullable = true)
 |-- ad_name: string (nullable = true)
 |-- ad_content: string (nullable = true)
 |-- load_date: timestamp (nullable = true)
 |-- source: string (nullable = true)



In [30]:
# Partitioning by Year of `ad_created_date`
df_silver = df_silver.withColumn("year", year(col("ad_created_time")))

StatementMeta(practice, 14, 31, Finished, Available, Finished)

In [31]:
df_silver.show(5)

StatementMeta(practice, 14, 32, Finished, Available, Finished)

+-------------------+-----------------+--------------------+--------------------+--------------------+------------+----+
|    ad_created_time|            ad_id|             ad_name|          ad_content|           load_date|      source|year|
+-------------------+-----------------+--------------------+--------------------+--------------------+------------+----+
|2023-07-24 14:24:26|23857169141750305|Angle US - Visuel #7|Passive Income 25...|2025-03-02 15:32:...|facebook_ads|2023|
|2023-07-24 14:24:26|23857169141760305|Angle US - Visuel #4|Passive Income 25...|2025-03-02 15:32:...|facebook_ads|2023|
|2023-07-24 14:24:26|23857169141770305|Angle US - Visuel #1|Business 25+ - An...|2025-03-02 15:32:...|facebook_ads|2023|
|2023-07-24 14:24:31|23857169141790305|Angle US - Visuel #7|Business 25+ - An...|2025-03-02 15:32:...|facebook_ads|2023|
|2023-07-24 14:24:31|23857169141800305|Angle US - Visuel...|Passive Income 25...|2025-03-02 15:32:...|facebook_ads|2023|
+-------------------+-----------

In [32]:
# Save Transformed Data to Silver Layer in Parquet format
print("🚀 Saving `dim_ad` to Silver Layer...")
df_silver.write.mode("overwrite").partitionBy("year").parquet(silver_path)
print(f"✅ Successfully saved `dim_ad` to {silver_path}")

StatementMeta(practice, 14, 33, Finished, Available, Finished)

🚀 Saving `dim_ad` to Silver Layer...
✅ Successfully saved `dim_ad` to abfss://learning@learningstorage1093.dfs.core.windows.net/silver/facebook_ads/dim_ad/
