# Activity Events Fetcher
This notebook replicates the functionality of `test-activityevents.ps1` to fetch Power BI activity events based on specific filters.

In [None]:
import requests
import json
from pyspark.sql import SparkSession
from notebookutils import mssparkutils

In [None]:
# Parameters
# Note: The Power BI Activity API typically requires dates to be wrapped in single quotes, e.g., '2024-01-29...'
start_date_time = "'2026-01-01T00:00:00.000Z'"
end_date_time = "'2026-01-01T12:00:00.000Z'"

# Filters matching the original PowerShell script
filters = [
    "Activity eq 'EnableWorkspaceOutboundAccessProtection'",
    "Activity eq 'DisableWorkspaceOutboundAccessProtection'"
]

In [None]:
# Authentication
# In Azure Fabric, we can often grab the token automatically for the Power BI API
auth_token = ""

if mssparkutils:
    try:
        # PBI Audience
        pbi_audience = "https://analysis.windows.net/powerbi/api"
        auth_token = mssparkutils.credentials.getToken(pbi_audience)
        print("Successfully retrieved token from mssparkutils.")
    except Exception as e:
        print(f"Failed to get token via mssparkutils: {e}")

if not auth_token:
    # Fallback to manual input or validation if automation fails
    # You can replace this string with your Bearer token if running manually
    auth_token = "INSERT_YOUR_MANUAL_TOKEN_HERE" 
    print("Using manual token placeholder. Please replace if mssparkutils failed.")

headers = {
    "Authorization": f"Bearer {auth_token}",
    "Content-Type": "application/json"
}

In [None]:
# Fetch Data Logic
base_url = "https://api.powerbi.com/v1.0/myorg/admin/activityevents"
all_activity_events = []

for f in filters:
    if not f or not f.strip():
        continue
    
    # Construct initial URL
    # Depending on input, we ensure we construct the query correctly
    url = f"{base_url}?startDateTime={start_date_time}&endDateTime={end_date_time}&$filter={f}"
    
    print(f"Processing filter: {f}")
    
    try:
        while url:
            # print(f"Fetching: {url}") # Uncomment for verbose logging
            response = requests.get(url, headers=headers)
            response.raise_for_status() # Raise error for bad status codes
            
            data = response.json()
            
            # Aggregate results - checking both 'activityEventEntities' and 'value' as per PS script
            if 'activityEventEntities' in data:
                all_activity_events.extend(data['activityEventEntities'])
            elif 'value' in data:
                all_activity_events.extend(data['value'])
            
            # Check for continuation URI for pagination
            url = data.get('continuationUri')
            
    except Exception as e:
        print(f"Error processing filter '{f}': {e}")

print(f"Total events fetched: {len(all_activity_events)}")

In [None]:
# Display and Output
if all_activity_events:
    try:
        # Create Spark DataFrame directly from the list of dictionaries
        # PySpark infers schema from the list of row dictionaries
        spark_df = spark.createDataFrame(all_activity_events)
        
        # Display using Fabric's rich display
        display(spark_df)
    except Exception as e:
        print(f"Error creating Spark DataFrame: {e}")
        # Tip: If schema inference fails due to inconsistent types, you might need to define a schema explicity
else:
    print("No events found.")

# Write to Eventhouse (KQL Database)
The following cells handle writing the dataframe to an Eventhouse.

### Configuration
Set your KQL Cluster URI and Database name below.
*   **Merge Strategy:** Since KQL is append-optimized, to perform a "Merge" (avoid duplicates), we will:
    1.  Write the data to a generic **Staging Table**.
    2.  Use the `azure-kusto-data` library to execute a KQL command that moves only *new* records (based on `Id`) from Staging to the Final table.
    3.  Clear the Staging table.

*If you only want to append all data, you can skip the KQL Command part and just write directly to the final table.*

In [None]:
# KQL Configuration
kusto_cluster_uri = "https://<your-cluster>.<region>.kusto.data.microsoft.com"
kusto_database = "<your-database>"
target_table = "ActivityEvents"
staging_table = "ActivityEvents_Staging"

try:
    # Get Token for Kusto using mssparkutils
    kusto_token = mssparkutils.credentials.getToken(kusto_cluster_uri)
    print("Kusto Token retrieved successfully.")
except Exception as e:
    print(f"Failed to get Kusto token: {e}")
    kusto_token = None

def run_kusto_command(query):
    """
    Executes a KQL Control Command using the Kusto REST API via the 'requests' library.
    This avoids needing the azure-kusto-data library.
    """
    if not kusto_token:
        print("Cannot run command: No token available.")
        return

    # Kusto Management Endpoint
    # Ensure URI doesn't have trailing slash
    mgmt_endpoint = f"{kusto_cluster_uri.rstrip('/')}/v1/rest/mgmt"
    
    headers = {
        "Authorization": f"Bearer {kusto_token}",
        "Content-Type": "application/json"
    }

    body = {
        "db": kusto_database,
        "csl": query
    }
    
    try:
        response = requests.post(mgmt_endpoint, headers=headers, json=body)
        response.raise_for_status()
        # print(response.json()) # Uncomment to see detailed result
        return True
    except Exception as e:
        print(f"Failed to execute KQL command via REST API: {e}")
        if 'response' in locals() and response.content:
             print(f"Details: {response.content}")
        return False

In [None]:
if all_activity_events:
    # 1. Write Data to Staging Table using Spark Connector
    # Using the Synapse/Fabric Kusto connector with the provided Token
    
    print(f"Writing {spark_df.count()} rows to Staging Table: {staging_table}")
    
    # Using 'com.microsoft.kusto.spark.synapse.datasource' as recommended for Fabric
    spark_df.write \
        .format("com.microsoft.kusto.spark.synapse.datasource") \
        .option("kustoCluster", kusto_cluster_uri) \
        .option("kustoDatabase", kusto_database) \
        .option("kustoTable", staging_table) \
        .option("accessToken", kusto_token) \
        .option("tableCreateOptions", "CreateIfNotExist") \
        .mode("Append") \
        .save()
        
    print("Write to staging complete.")

    # 2. Execute Merge Logic (Deduplication) via REST API helper
    
    # Construct KQL command to merge
    # This query moves rows from Staging to Target WHERE the Id does not already exist in Target
    merge_query = f"""
    .set-or-append {target_table} <| 
    {staging_table} 
    | join kind=leftanti {target_table} on Id
    """
    
    # Command to clean up staging
    cleanup_query = f".clear table {staging_table} data"

    print("Executing Merge (set-or-append)...")
    if run_kusto_command(merge_query):
        print("Merge complete.")
        
        print("Cleaning up staging table...")
        if run_kusto_command(cleanup_query):
            print("Cleanup complete.")
    else:
        print("Merge failed.")
            
else:
    print("No data to write.")