In [None]:
# 2. Configuration (Replace with your own values or use mssparkutils.credentials.getSecret)

# Environment URL (e.g., https://org1234.crm.dynamics.com)
dataverse_env_url = "https://orgd2bf3532.crm4.dynamics.com"

# Credentials (Use Key Vault in production!)


# Construct the Token Scope
# Dataverse requires the scope to be the Environment URL + "/.default"
if not dataverse_env_url.endswith("/"):
    token_scope = f"{dataverse_env_url}/.default"
else:
    token_scope = f"{dataverse_env_url}.default"

authority_url = f"https://login.microsoftonline.com/{tenant_id}"

print(f"Target Environment: {dataverse_env_url}")
print(f"Auth Scope: {token_scope}")

In [None]:
# 3. Authenticate and Get Token (Using MSAL)
import msal
import requests
import json

app = msal.ConfidentialClientApplication(
    client_id, 
    authority=authority_url,
    client_credential=client_secret
)

# Acquire token
result = app.acquire_token_for_client(scopes=[token_scope])

if "access_token" in result:
    token = result['access_token']
    print("Authentication successful. Token acquired.")
else:
    print("Authentication failed.")
    print(result.get("error"))
    print(result.get("error_description"))
    raise Exception("Could not retrieve access token")

In [None]:
# 4. Query Dataverse Web API (Accounts)

# Construct URI
api_version = "v9.2"
entity_name = "accounts"
# Using OData query options to limit columns (select) and row count (top)
# Added emailaddress1 and emailaddress2 for the merge
query_options = "?$select=workspaceId,emailaddress1,emailaddress2" 

# Ensure no trailing slash for base URL
base_url = dataverse_env_url.rstrip("/")
request_uri = f"{base_url}/api/data/{api_version}/{entity_name}{query_options}"

# Set Headers
headers = {
    "Authorization": f"Bearer {token}",
    "OData-MaxVersion": "4.0",
    "OData-Version": "4.0",
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Prefer": "odata.include-annotations=*" 
}

print(f"Querying URI: {request_uri}")

In [None]:
# 5. Execute Request and Process Response

try:
    response = requests.get(request_uri, headers=headers)
    
    # Check for HTTP errors
    response.raise_for_status()

    # Parse JSON
    data = response.json()

    if "value" in data and len(data["value"]) > 0:
        accounts = data["value"]
        print(f"Successfully retrieved {len(accounts)} accounts.")
        

        
        # Display All in a nicer format (Spark DataFrame)
        df = spark.createDataFrame(accounts)
        
        # Select and rename columns to match the target table WorkspaceEmail(workspaceId, PrimaryEmail, SecondaryEmail)
        # Mapping: workspaceId -> workspaceId, emailaddress1 -> PrimaryEmail, emailaddress2 -> SecondaryEmail
        df_merged = df.selectExpr(
            "workspaceId", 
            "emailaddress1 as PrimaryEmail", 
            "emailaddress2 as SecondaryEmail"
        )
        display(df_merged)

        # Merge Logic
        from delta.tables import DeltaTable
        
        # Define the target table name
        tableName = "WorkspaceEmail"
        
        if DeltaTable.isDeltaTable(spark, tableName):
            targetTable = DeltaTable.forName(spark, tableName)
            
            targetTable.alias("target").merge(
                df_merged.alias("source"),
                "target.workspaceId = source.workspaceId"
            ).whenMatchedUpdate(set = {
                "PrimaryEmail": "source.PrimaryEmail",
                "SecondaryEmail": "source.SecondaryEmail"
            }).whenNotMatchedInsert(values = {
                "workspaceId": "source.workspaceId",
                "PrimaryEmail": "source.PrimaryEmail",
                "SecondaryEmail": "source.SecondaryEmail"
            }).execute()
            print(f"Merge completed for table {tableName}")
        else:
            print(f"Table {tableName} does not exist. Creating and loading data...")
            df_merged.write.format("delta").saveAsTable(tableName)
            print(f"Table {tableName} created and data loaded.")

    else:
        print("No accounts found.")
        print("Full Response:")
        print(json.dumps(data, indent=2))

except requests.exceptions.HTTPError as err:
    print(f"HTTP Error: {err}")
    print(response.text)
except Exception as e:
    print(f"Error: {e}")