In [7]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, year, month, dayofmonth, lit
import requests
import json
import datetime

StatementMeta(practice, 9, 8, Finished, Available, Finished)

In [8]:
# Initialize Spark Session
spark = SparkSession.builder.appName("FacebookAdsBackfillDimAdset").getOrCreate()

# Define Storage Account & Container
storage_account_name = "learningstorage1093"
container_name = "learning"
stage = "bronze"
adls_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/{stage}/facebook_ads/dim_adset/historical/"

StatementMeta(practice, 9, 9, Finished, Available, Finished)

In [9]:
# Facebook Ads API Config
API_KEY = "INSERT_WINDSOR_API_KEY"
BASE_URL = "https://connectors.windsor.ai/facebook"
ACCOUNT_ID = "INSERT_ACCOUNT_ID"
# Selecting Table Fields for `dim_adset`
FIELDS = "adset_id,adset_name,adset_status,adset_created_time,adset_start_time,adset_end_time,adset_daily_budget"

StatementMeta(practice, 9, 10, Finished, Available, Finished)

In [10]:
# Define the Start and End Date for Historical Backfill
start_date = datetime.date(2023, 1, 1)  
end_date = datetime.date.today() - datetime.timedelta(days=1)   

StatementMeta(practice, 9, 11, Finished, Available, Finished)

In [11]:
def fetch_facebook_ads_dim_adset():
    """Fetches Facebook Ads `dim_adset` data for the given date range."""
    params = {
        "api_key": API_KEY,
        "date_from": start_date,
        "date_to": end_date,
        "fields": FIELDS
    }
    
    response = requests.get(BASE_URL, params=params)

    # Ensure response is valid JSON and contains 'data'
    if response.status_code == 200:
        try:
            json_data = response.json() 
            if "data" in json_data:  
                return json_data["data"]
            else:
                print(f"No 'data' key found in response for dim_adset")
                return []
        except json.JSONDecodeError as e:
            print(f"JSON parsing error: {e}")
            return []
    else:
        print(f"Error fetching dim_adset data: {response.text}")
        return []


StatementMeta(practice, 9, 12, Finished, Available, Finished)

In [12]:
# Fetch data
print("📌 Fetching `dim_adset` data...")
data = fetch_facebook_ads_dim_adset()


StatementMeta(practice, 9, 13, Finished, Available, Finished)

📌 Fetching `dim_adset` data...


In [13]:
if data:
    try:
        # Convert Data into DataFrame
        df = spark.createDataFrame(data)

        # Add metadata columns
        df = df.withColumn("load_date", lit(datetime.datetime.now().isoformat())) \
               .withColumn("source", lit("facebook_ads"))

        # Partitioning by Year of Ad Set Creation
        df = df.withColumn("year", year(col("adset_created_time")))

        # Save to ADLS
        print("🚀 Saving `dim_adset` to Bronze Layer...")
        df.write.mode("overwrite").partitionBy("year").json(adls_path)

        print(f"✅ Successfully saved `dim_adset` data to {adls_path}")

    except Exception as e:
        print(f"❌ Error processing `dim_adset`: {e}")

else:
    print("❌ No `dim_adset` data found, skipping save operation.")

StatementMeta(practice, 9, 14, Finished, Available, Finished)

🚀 Saving `dim_adset` to Bronze Layer...
✅ Successfully saved `dim_adset` data to abfss://learning@learningstorage1093.dfs.core.windows.net/bronze/facebook_ads/dim_adset/historical/
