In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, year, month, dayofmonth, lit
import requests
import json
import datetime

StatementMeta(practice, 8, 2, Finished, Available, Finished)

In [3]:
# Initialize Spark Session
spark = SparkSession.builder.appName("FacebookAdsBackfillDimAd").getOrCreate()

# Define Storage Account & Container
storage_account_name = "learningstorage1093"
container_name = "learning"
stage = "bronze"
adls_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/{stage}/facebook_ads/dim_ad/historical/"

StatementMeta(practice, 8, 3, Finished, Available, Finished)

In [4]:
# Facebook Ads API Config
API_KEY = "5baaf652028536e79b8ac9dfc72569ec781c"
BASE_URL = "https://connectors.windsor.ai/facebook"
ACCOUNT_ID = "2977132755882180"
# Selecting Table Fields for `dim_ad`
FIELDS = "ad_id,ad_name,adcontent,ad_created_time"

StatementMeta(practice, 8, 4, Finished, Available, Finished)

In [5]:
# Define the Start and End Date for Historical Backfill
start_date = datetime.date(2023, 1, 1)  
end_date = datetime.date.today() - datetime.timedelta(days=1) 

StatementMeta(practice, 8, 5, Finished, Available, Finished)

In [6]:
def fetch_facebook_ads_dim_ad():
    """Fetches Facebook Ads `dim_ad` data for the given date range."""
    params = {
        "api_key": API_KEY,
        "date_from": start_date,
        "date_to": end_date,
        "fields": FIELDS
    }
    
    response = requests.get(BASE_URL, params=params)

    # Ensure response is valid JSON and contains 'data'
    if response.status_code == 200:
        try:
            json_data = response.json()  # Convert response to JSON
            if "data" in json_data:  # Extract 'data' key
                return json_data["data"]
            else:
                print(f"No 'data' key found in response for dim_ad")
                return []
        except json.JSONDecodeError as e:
            print(f"JSON parsing error: {e}")
            return []
    else:
        print(f"Error fetching dim_ad data: {response.text}")
        return []

StatementMeta(practice, 8, 6, Finished, Available, Finished)

In [7]:
# Fetch data
print("📌 Fetching `dim_ad` data...")
data = fetch_facebook_ads_dim_ad()


StatementMeta(practice, 8, 7, Finished, Available, Finished)

📌 Fetching `dim_ad` data...


In [8]:
if data:
    try:
        # Convert Data into DataFrame
        df = spark.createDataFrame(data)

        # Add metadata columns
        df = df.withColumn("load_date", lit(datetime.datetime.now().isoformat())) \
               .withColumn("source", lit("facebook_ads"))

        # Partitioning by Year of Ad Creation
        df = df.withColumn("year", year(col("ad_created_time")))

        # Save to ADLS
        print("🚀 Saving `dim_ad` to Bronze Layer...")
        df.write.mode("overwrite").partitionBy("year").json(adls_path)

        print(f"✅ Successfully saved `dim_ad` data to {adls_path}")

    except Exception as e:
        print(f"❌ Error processing `dim_ad`: {e}")

else:
    print("❌ No `dim_ad` data found, skipping save operation.")

StatementMeta(practice, 8, 8, Finished, Available, Finished)

🚀 Saving `dim_ad` to Bronze Layer...
✅ Successfully saved `dim_ad` data to abfss://learning@learningstorage1093.dfs.core.windows.net/bronze/facebook_ads/dim_ad/historical/
