In [None]:
# 2. Configuration (Replace with your own values or use mssparkutils.credentials.getSecret)

# Environment URL (e.g., https://org1234.crm.dynamics.com)
dataverse_env_url = "https://orgd2bf3532.crm4.dynamics.com"

# Credentials (Use Key Vault in production!)


# Construct the Token Scope
# Dataverse requires the scope to be the Environment URL + "/.default"
if not dataverse_env_url.endswith("/"):
    token_scope = f"{dataverse_env_url}/.default"
else:
    token_scope = f"{dataverse_env_url}.default"

authority_url = f"https://login.microsoftonline.com/{tenant_id}"

print(f"Target Environment: {dataverse_env_url}")
print(f"Auth Scope: {token_scope}")

In [None]:
# 3. Authenticate and Get Token (Using MSAL)
import msal
import requests
import json

app = msal.ConfidentialClientApplication(
    client_id, 
    authority=authority_url,
    client_credential=client_secret
)

# Acquire token
result = app.acquire_token_for_client(scopes=[token_scope])

if "access_token" in result:
    token = result['access_token']
    print("Authentication successful. Token acquired.")
else:
    print("Authentication failed.")
    print(result.get("error"))
    print(result.get("error_description"))
    raise Exception("Could not retrieve access token")

In [None]:
# 4. Helper Functions: KQL and Dataverse

def get_pending_workspace_ids():
    """
    Queries Kusto (ADX) AlertLogs table for distinct WorkspaceIds.
    Uses the 'com.microsoft.kusto.spark.synapse.datasource' format.
    """
    # Kusto Configuration
    kusto_cluster = "https://mycluster.kusto.windows.net"
    kusto_database = "MyDatabase" # Replace with your actual database name
    kusto_query = "AlertLogs | where AlertStatus != 'EmailSent' | distinct WorkspaceId"
    
    try:
        print(f"Reading from Kusto Cluster: {kusto_cluster}, Database: {kusto_database}")
        
        # Read from Kusto
        df_kql = spark.read.format("com.microsoft.kusto.spark.synapse.datasource") \
            .option("kustoCluster", kusto_cluster) \
            .option("kustoDatabase", kusto_database) \
            .option("kustoQuery", kusto_query) \
            .load()
        
        # Collect IDs into a python list
        workspace_ids = [row.WorkspaceId for row in df_kql.collect()]
        
        print(f"Found {len(workspace_ids)} pending workspaces.")
        return workspace_ids
        
    except Exception as e:
        print(f"Error querying Kusto: {e}")
        # Return empty list or raising error depending on preference.
        return []

def fetch_dataverse_account(workspace_id, token, base_url):
    """
    Fetches account details from Dataverse filtering by accountid (mapped to workspaceId).
    """
    api_version = "v9.2"
    entity_name = "accounts"
    
    # We query for the specific accountid. Using $filter because accountid is the Key.
    query_options = f"?$select=accountid,emailaddress1,emailaddress2&$filter=accountid eq '{workspace_id}'"
    
    request_uri = f"{base_url}/api/data/{api_version}/{entity_name}{query_options}"
    
    headers = {
        "Authorization": f"Bearer {token}",
        "OData-MaxVersion": "4.0",
        "OData-Version": "4.0",
        "Accept": "application/json",
        "Content-Type": "application/json",
    }
    
    try:
        response = requests.get(request_uri, headers=headers)
        response.raise_for_status()
        data = response.json()
        
        if "value" in data and len(data["value"]) > 0:
            return data["value"][0] # Return the first match
        else:
            print(f"WorkspaceId {workspace_id} not found in Dataverse.")
            return None
            
    except requests.exceptions.HTTPError as err:
        print(f"HTTP Error fetching {workspace_id}: {err}")
        return None
    except Exception as e:
        print(f"Error fetching {workspace_id}: {e}")
        return None

In [None]:
# 5. Main Execution Flow

# Step 1: Query AlertLogs for WorkspaceIds
print("--- Step 1: Querying AlertLogs for Pending Workspaces ---")
pending_workspace_ids = get_pending_workspace_ids()

if not pending_workspace_ids:
    print("No pending workspaces found in AlertLogs (or table not accessible).")
else:
    print(f"Found {len(pending_workspace_ids)} workspaces pending processing.")

    # Step 2: Query Dataverse for each WorkspaceId
    print(f"--- Step 2: Fetching details for {len(pending_workspace_ids)} workspaces from Dataverse ---")
    
    account_records = []
    base_url = dataverse_env_url.rstrip("/")

    for ws_id in pending_workspace_ids:
        # Fetch account details
        print(f"Fetching details for WorkspaceId: {ws_id}")
        account_data = fetch_dataverse_account(ws_id, token, base_url)
        
        if account_data:
            # Map Dataverse columns to our Schema
            record = {
                "workspaceId": account_data.get("accountid", ws_id), # Fallback to input ID if missing
                "PrimaryEmail": account_data.get("emailaddress1"),
                "SecondaryEmail": account_data.get("emailaddress2")
            }
            account_records.append(record)

    # Step 3: Populate DataFrame and Merge
    if account_records:
        print(f"--- Step 3: Merging {len(account_records)} records into WorkspaceEmail table ---")
        
        # Create DataFrame
        df_merged = spark.createDataFrame(account_records)
        display(df_merged)

        # Merge Logic
        from delta.tables import DeltaTable
        tableName = "WorkspaceEmail"

        if DeltaTable.isDeltaTable(spark, tableName):
            targetTable = DeltaTable.forName(spark, tableName)
            
            # Upsert Logic: specific to workspaceId
            targetTable.alias("target").merge(
                df_merged.alias("source"),
                "target.workspaceId = source.workspaceId"
            ).whenMatchedUpdate(set = {
                "PrimaryEmail": "source.PrimaryEmail",
                "SecondaryEmail": "source.SecondaryEmail"
            }).whenNotMatchedInsert(values = {
                "workspaceId": "source.workspaceId",
                "PrimaryEmail": "source.PrimaryEmail",
                "SecondaryEmail": "source.SecondaryEmail"
            }).execute()
            print(f"Merge completed for table {tableName}")

        else:
            print(f"Table {tableName} does not exist. Creating it now...")
            df_merged.write.format("delta").saveAsTable(tableName)
            print(f"Table {tableName} created successfully.")
    else:
        print("No valid account records retrieved from Dataverse.")