In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, year, month, dayofmonth, lit
import requests
import json
import datetime

StatementMeta(practice, 12, 2, Finished, Available, Finished)

In [3]:
# Initialize Spark Session
spark = SparkSession.builder.appName("FacebookAdsBackfillDimPlatform").getOrCreate()

# Define Storage Account & Container
storage_account_name = "learningstorage1093"
container_name = "learning"
stage = "bronze"
adls_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/{stage}/facebook_ads/dim_platform/"

StatementMeta(practice, 12, 3, Finished, Available, Finished)

In [4]:
# Facebook Ads API Config
API_KEY = "5baaf652028536e79b8ac9dfc72569ec781c"
BASE_URL = "https://connectors.windsor.ai/facebook"
ACCOUNT_ID = "2977132755882180"
# Selecting Table Fields for `dim_platform`
FIELDS = "platform_position,publisher_platform,device_platform"

StatementMeta(practice, 12, 4, Finished, Available, Finished)

In [5]:
# Define the Start and End Date for Historical Backfill
start_date = datetime.date(2023, 1, 1) 
end_date = datetime.date.today() - datetime.timedelta(days=1)    

StatementMeta(practice, 12, 5, Finished, Available, Finished)

In [6]:
def fetch_facebook_ads_dim_platform():
    """Fetches Facebook Ads `dim_platform` data for the given date range."""
    params = {
        "api_key": API_KEY,
        "date_from": start_date,
        "date_to": end_date,
        "fields": FIELDS
    }
    
    response = requests.get(BASE_URL, params=params)

    # Ensure response is valid JSON and contains 'data'
    if response.status_code == 200:
        try:
            json_data = response.json()  # Convert response to JSON
            if "data" in json_data:  # Extract 'data' key
                return json_data["data"]
            else:
                print(f"No 'data' key found in response for dim_platform")
                return []
        except json.JSONDecodeError as e:
            print(f"JSON parsing error: {e}")
            return []
    else:
        print(f"Error fetching dim_platform data: {response.text}")
        return []

StatementMeta(practice, 12, 6, Finished, Available, Finished)

In [7]:
# Fetch data
print("📌 Fetching `dim_platform` data...")
data = fetch_facebook_ads_dim_platform()

StatementMeta(practice, 12, 7, Finished, Available, Finished)

📌 Fetching `dim_platform` data...


In [8]:
if data:
    try:
        # Convert Data into DataFrame
        df = spark.createDataFrame(data)

        # Add metadata columns
        df = df.withColumn("load_date", lit(datetime.datetime.now().isoformat())) \
               .withColumn("source", lit("facebook_ads"))

        # Save to ADLS
        print("🚀 Saving `dim_platform` to Bronze Layer...")
        df.write.mode("overwrite").json(adls_path)

        print(f"✅ Successfully saved `dim_platform` data to {adls_path}")

    except Exception as e:
        print(f"❌ Error processing `dim_platform`: {e}")

else:
    print("❌ No `dim_platform` data found, skipping save operation.")

StatementMeta(practice, 12, 8, Finished, Available, Finished)

🚀 Saving `dim_platform` to Bronze Layer...
✅ Successfully saved `dim_platform` data to abfss://learning@learningstorage1093.dfs.core.windows.net/bronze/facebook_ads/dim_platform/
