In [8]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, year, month, dayofmonth, lit
import requests
import json
import datetime

StatementMeta(practice, 10, 9, Finished, Available, Finished)

In [9]:
# Initialize Spark Session
spark = SparkSession.builder.appName("FacebookAdsBackfillDimCampaign").getOrCreate()

# Define Storage Account & Container
storage_account_name = "learningstorage1093"
container_name = "learning"
stage = "bronze"
adls_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/{stage}/facebook_ads/dim_campaign/historical/"

StatementMeta(practice, 10, 10, Finished, Available, Finished)

In [10]:
# Facebook Ads API Config
API_KEY = "5baaf652028536e79b8ac9dfc72569ec781c"
BASE_URL = "https://connectors.windsor.ai/facebook"
ACCOUNT_ID = "2977132755882180"
# Selecting Table Fields for `dim_campaign`
FIELDS = "campaign_id,campaign_name,campaign_objective,campaign_status,campaign_created_time"

StatementMeta(practice, 10, 11, Finished, Available, Finished)

In [11]:
# Define the Start and End Date for Historical Backfill
start_date = datetime.date(2023, 1, 1)  
end_date = datetime.date.today() - datetime.timedelta(days=1)    

StatementMeta(practice, 10, 12, Finished, Available, Finished)

In [12]:
def fetch_facebook_ads_dim_campaign():
    """Fetches Facebook Ads `dim_campaign` data for the given date range."""
    params = {
        "api_key": API_KEY,
        "date_from": start_date,
        "date_to": end_date,
        "fields": FIELDS
    }
    
    response = requests.get(BASE_URL, params=params)

    #  Ensure response is valid JSON and contains 'data'
    if response.status_code == 200:
        try:
            json_data = response.json()  
            if "data" in json_data:  
                return json_data["data"]
            else:
                print(f" No 'data' key found in response for dim_campaign")
                return []
        except json.JSONDecodeError as e:
            print(f" JSON parsing error: {e}")
            return []
    else:
        print(f" Error fetching dim_campaign data: {response.text}")
        return []

StatementMeta(practice, 10, 13, Finished, Available, Finished)

In [13]:
# Fetch data
print("📌 Fetching `dim_campaign` data...")
data = fetch_facebook_ads_dim_campaign()

StatementMeta(practice, 10, 14, Finished, Available, Finished)

📌 Fetching `dim_campaign` data...


In [14]:
if data:
    try:
        # Convert Data into DataFrame
        df = spark.createDataFrame(data)

        # Add metadata columns
        df = df.withColumn("load_date", lit(datetime.datetime.now().isoformat())) \
               .withColumn("source", lit("facebook_ads"))

        # Partitioning by Year of Campaign Creation
        df = df.withColumn("year", year(col("campaign_created_time")))

        # Save to ADLS
        print("🚀 Saving `dim_campaign` to Bronze Layer...")
        df.write.mode("overwrite").partitionBy("year").json(adls_path)

        print(f"✅ Successfully saved `dim_campaign` data to {adls_path}")

    except Exception as e:
        print(f"❌ Error processing `dim_campaign`: {e}")

else:
    print("❌ No `dim_campaign` data found, skipping save operation.")

StatementMeta(practice, 10, 15, Finished, Available, Finished)

🚀 Saving `dim_campaign` to Bronze Layer...
✅ Successfully saved `dim_campaign` data to abfss://learning@learningstorage1093.dfs.core.windows.net/bronze/facebook_ads/dim_campaign/historical/
