# Elexon API Client Testing

This notebook will systematically test the Elexon API client implementation to identify why changes aren't having the expected effect. We'll:

1. Test basic connectivity to the API
2. Examine the response structure of various endpoints
3. Debug the data processing in the client
4. Test fixes to ensure proper data retrieval

In [None]:
# Import necessary libraries
import sys
import os
import json
import requests
import pandas as pd
from pprint import pprint
from pathlib import Path

# Add the project src directory to the path so we can import from it
sys.path.append('/workspaces/energy-market-tracker')
from src.fetching.elexon_client import ElexonApiClient
import src.config as config

## 1. Initialize Client and Test Connectivity

First, let's initialize the Elexon API client and test basic connectivity with the API health endpoint.

In [None]:
# Initialize the client
client = ElexonApiClient()

# Test the health endpoint to verify basic connectivity
health_df = client.get_health()
print("Health endpoint response:")
print(health_df)

## 2. Investigate Raw API Responses

Let's directly examine the raw responses from the API to understand their structure before processing.
We'll implement a helper function to inspect raw API responses.

In [None]:
def inspect_raw_response(endpoint_path, params=None):
    """Helper function to inspect raw API responses"""
    base_url = "https://data.elexon.co.uk/bmrs/api/v1"
    url = f"{base_url}{endpoint_path}"
    headers = {"apiKey": config.ELEXON_API_KEY}
    
    print(f"Requesting: {url}")
    if params:
        print(f"With params: {params}")
    
    response = requests.get(url, headers=headers, params=params or {}, timeout=30)
    print(f"Status code: {response.status_code}")
    
    try:
        data = response.json()
        print(f"Response type: {type(data)}")
        if isinstance(data, dict):
            print("Top-level keys:")
            for key in data.keys():
                print(f"  - {key}")
                
            if "data" in data:
                data_value = data["data"]
                print(f"\nData value type: {type(data_value)}")
                if isinstance(data_value, list):
                    print(f"Data list length: {len(data_value)}")
                    if len(data_value) > 0:
                        print("First item:")
                        pprint(data_value[0])
                elif isinstance(data_value, dict):
                    print("Data dict:")
                    pprint(data_value)
            else:
                print("\nNo 'data' key found")
                print("Sample of response:")
                pprint(list(data.items())[:5] if isinstance(data, dict) else data[:5])
        elif isinstance(data, list):
            print(f"List length: {len(data)}")
            if len(data) > 0:
                print("First item:")
                pprint(data[0])
        
        return data
    except Exception as e:
        print(f"Error parsing response: {e}")
        print("Raw response:")
        print(response.text[:1000])  # Show first 1000 chars to avoid overwhelming output
        return None

## 3. Test Different Endpoint Types

Let's test a few different endpoints to see if they have different response structures:
1. A simple endpoint (health)
2. A demand endpoint
3. A generation endpoint
4. A settlement endpoint

In [None]:
# Test the health endpoint
health_raw = inspect_raw_response("/health")

In [None]:
# Test a demand endpoint
demand_raw = inspect_raw_response("/demand/actual/total")

In [None]:
# Test a generation endpoint
generation_raw = inspect_raw_response("/generation/actual/per-type")

In [None]:
# Test a settlement endpoint with parameters
from datetime import datetime, timedelta

# Get yesterday's date for testing
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
settlement_raw = inspect_raw_response(
    f"/balancing/settlement/system-prices/{yesterday}"
)

## 4. Debug the `_get` Method

Let's create a modified version of the `_get` method to debug how it processes responses and observe the transformations it performs.

In [None]:
def debug_get(path, params):
    """Debug version of the _get method to track data processing steps"""
    base_url = "https://data.elexon.co.uk/bmrs/api/v1"
    url = f"{base_url}{path}"
    headers = {"apiKey": config.ELEXON_API_KEY}
    
    print(f"Request URL: {url}")
    print(f"Request params: {params}")
    
    try:
        response = requests.get(url, headers=headers, params=params, timeout=30)
        response.raise_for_status()
        print(f"Response status: {response.status_code}")
        
        payload = response.json()
        print(f"Payload type: {type(payload)}")
        
        # Handle different payload types
        if isinstance(payload, list):
            print("Payload is a list")
            data = payload
            
        elif isinstance(payload, dict):
            print("Payload is a dict")
            print(f"Payload keys: {list(payload.keys())}")
            
            if "data" in payload:
                data = payload.get("data", [])
                print(f"Extracted 'data' field of type {type(data)}")
                if isinstance(data, list):
                    print(f"Data list length: {len(data)}")
                    if len(data) > 0:
                        print("First data item sample:")
                        pprint(data[0])
                else:
                    print("Data value:")
                    pprint(data)
            else:
                print("No 'data' key found, using full payload")
                data = payload
                
        else:
            print(f"Unexpected payload type: {type(payload)}")
            data = []
            
        # Build DataFrame
        print("\nCreating DataFrame...")
        if isinstance(data, list):
            df = pd.DataFrame(data)
            print(f"Created DataFrame from list with shape: {df.shape}")
        elif isinstance(data, dict):
            df = pd.DataFrame([data])
            print(f"Created DataFrame from dict with shape: {df.shape}")
        else:
            df = pd.DataFrame()
            print("Created empty DataFrame")
            
        return df
        
    except requests.RequestException as e:
        print(f"Error fetching {url} with params={params}: {e}")
        return pd.DataFrame()

In [None]:
# Test the debug_get function with various endpoints
print("=== Testing health endpoint ===")
health_df = debug_get("/health", {})
print("\n=== Testing demand endpoint ===")
demand_df = debug_get("/demand/actual/total", {})
print("\n=== Testing generation endpoint ===")
generation_df = debug_get("/generation/actual/per-type", {})
print("\n=== Testing settlement endpoint ===")
settlement_df = debug_get(f"/balancing/settlement/system-prices/{yesterday}", {})

## 5. Test Specific Problem Endpoints

Let's test some specific endpoints that might be causing issues in your implementation. These will help identify if certain endpoints have unique structures that aren't being handled correctly.

In [None]:
# List of endpoints to test for structure variations
problem_endpoints = [
    ("/demand/peak", {}),
    ("/generation/outturn/current", {}),
    ("/forecast/generation/wind/latest", {}),
    (f"/balancing/acceptances/all", {"settlementDate": yesterday}),
    ("/balancing/bid-offer", {"bmUnit": "T_DRAXX-1", "from": yesterday})
]

for path, params in problem_endpoints:
    print(f"\n\n{'='*50}")
    print(f"Testing endpoint: {path}")
    print(f"{'='*50}")
    raw_response = inspect_raw_response(path, params)
    print("\nProcessing with debug_get:")
    df_result = debug_get(path, params)
    print(f"Result DataFrame shape: {df_result.shape}")
    if not df_result.empty:
        print("DataFrame columns:")
        print(df_result.columns.tolist())
        print("\nFirst row:")
        print(df_result.iloc[0].to_dict())

## 6. Implement and Test Potential Fixes

Based on our findings, let's implement a fixed version of the `_get` method that properly handles the different response structures we've observed.

In [None]:
def fixed_get(path, params):
    """Improved version of the _get method to handle different response structures"""
    base_url = "https://data.elexon.co.uk/bmrs/api/v1"
    url = f"{base_url}{path}"
    headers = {"apiKey": config.ELEXON_API_KEY}
    
    try:
        response = requests.get(url, headers=headers, params=params, timeout=30)
        response.raise_for_status()
        payload = response.json()
        
        # Case 1: Payload is a list - use directly
        if isinstance(payload, list):
            return pd.DataFrame(payload)
            
        # Case 2: Payload is a dict
        elif isinstance(payload, dict):
            # Case 2a: Has 'data' key that is a list
            if "data" in payload and isinstance(payload["data"], list):
                return pd.DataFrame(payload["data"])
                
            # Case 2b: Has 'data' key that is a dict
            elif "data" in payload and isinstance(payload["data"], dict):
                return pd.DataFrame([payload["data"]])
                
            # Case 2c: Has no 'data' key, but has items - use entire dict
            elif len(payload) > 0:
                # Some endpoints return a dict without a 'data' key
                # First check if it has typical metadata keys
                metadata_keys = ["apiVersion", "batchSize", "totalRecords", "status"]
                data_keys = [k for k in payload.keys() if k not in metadata_keys]
                
                # If we found data keys, extract just those
                if data_keys:
                    data_subset = {k: payload[k] for k in data_keys}
                    return pd.DataFrame([data_subset])
                else:
                    # Otherwise, use the whole payload as a single row
                    return pd.DataFrame([payload])
            else:
                return pd.DataFrame()
                
        # Case 3: Anything else - return empty DataFrame
        else:
            return pd.DataFrame()
            
    except requests.RequestException as e:
        print(f"Error fetching {url} with params={params}: {e}")
        return pd.DataFrame()

In [None]:
# Test our fixed implementation with the same problem endpoints
print("Testing fixed implementation with problem endpoints:")

for path, params in problem_endpoints:
    print(f"\n\n{'='*50}")
    print(f"Testing fixed implementation on: {path}")
    print(f"{'='*50}")
    
    # Get raw response for reference
    raw_response = inspect_raw_response(path, params)
    
    # Test with our fixed implementation
    df_result = fixed_get(path, params)
    
    print(f"\nFixed implementation result DataFrame shape: {df_result.shape}")
    if not df_result.empty:
        print("DataFrame columns:")
        print(df_result.columns.tolist())
        print("\nFirst row sample:")
        sample = df_result.iloc[0].to_dict()
        # Print just a few items to avoid overwhelming output
        print({k: v for i, (k, v) in enumerate(sample.items()) if i < 5})

## 7. Create an Updated Client Class with Fixes

Based on our testing, let's create an updated version of the ElexonApiClient class that incorporates our fixes.

In [None]:
class FixedElexonApiClient(ElexonApiClient):
    """
    An improved version of the ElexonApiClient with fixes for handling different response structures.
    """
    
    def _get(self, path: str, params: dict) -> pd.DataFrame:
        """
        Fixed version of the _get method that properly handles different API response structures
        """
        url = f"{self.base_url}{path}"
        headers = {"apiKey": self.api_key}
        try:
            response = requests.get(url, headers=headers, params=params, timeout=30)
            response.raise_for_status()
            payload = response.json()
            
            # Case 1: Payload is a list - use directly
            if isinstance(payload, list):
                return pd.DataFrame(payload)
                
            # Case 2: Payload is a dict
            elif isinstance(payload, dict):
                # Case 2a: Has 'data' key that is a list
                if "data" in payload and isinstance(payload["data"], list):
                    return pd.DataFrame(payload["data"])
                    
                # Case 2b: Has 'data' key that is a dict
                elif "data" in payload and isinstance(payload["data"], dict):
                    return pd.DataFrame([payload["data"]])
                    
                # Case 2c: Has no 'data' key, but has items - use entire dict
                elif len(payload) > 0:
                    # Check if it's just metadata by looking for common metadata keys
                    metadata_keys = ["apiVersion", "batchSize", "totalRecords", "status"]
                    data_keys = [k for k in payload.keys() if k not in metadata_keys]
                    
                    if data_keys:
                        # If we have data keys, extract those
                        data_subset = {k: payload[k] for k in data_keys}
                        return pd.DataFrame([data_subset])
                    else:
                        # Otherwise use the whole payload
                        return pd.DataFrame([payload])
                else:
                    return pd.DataFrame()
            
            # Case 3: Anything else - return empty DataFrame
            else:
                return pd.DataFrame()
                
        except requests.RequestException as e:
            print(f"Error fetching {url} with params={params}: {e}")
            return pd.DataFrame()

## 8. Test the Fixed Client Implementation

Let's compare the original client with our fixed implementation to see if it resolves the issues.

In [None]:
# Initialize both client versions
original_client = ElexonApiClient()
fixed_client = FixedElexonApiClient()

# Test with our problem endpoints and compare results
for path, params in problem_endpoints:
    endpoint_name = path.strip("/").replace("/", "_")
    if params:
        endpoint_name += "_" + "_".join(f"{k}_{v}" for k, v in params.items())
        
    print(f"\n\n{'='*50}")
    print(f"Testing endpoint: {path}")
    print(f"{'='*50}")
    
    # Call with original client
    original_method = getattr(original_client, "call_endpoint")
    print("\nOriginal client:")
    try:
        start_time = pd.Timestamp.now()
        original_df = original_method(path, path_params={}, query_params=params)
        end_time = pd.Timestamp.now()
        print(f"Call completed in {(end_time - start_time).total_seconds():.2f} seconds")
        print(f"Result shape: {original_df.shape}")
        if not original_df.empty:
            print(f"Columns: {original_df.columns.tolist()[:5]}")
            print("First row sample:")
            sample = original_df.iloc[0].to_dict()
            print({k: v for i, (k, v) in enumerate(sample.items()) if i < 5})
        else:
            print("Empty DataFrame returned")
    except Exception as e:
        print(f"Error: {e}")
    
    # Call with fixed client
    fixed_method = getattr(fixed_client, "call_endpoint")
    print("\nFixed client:")
    try:
        start_time = pd.Timestamp.now()
        fixed_df = fixed_method(path, path_params={}, query_params=params)
        end_time = pd.Timestamp.now()
        print(f"Call completed in {(end_time - start_time).total_seconds():.2f} seconds")
        print(f"Result shape: {fixed_df.shape}")
        if not fixed_df.empty:
            print(f"Columns: {fixed_df.columns.tolist()[:5]}")
            print("First row sample:")
            sample = fixed_df.iloc[0].to_dict()
            print({k: v for i, (k, v) in enumerate(sample.items()) if i < 5})
        else:
            print("Empty DataFrame returned")
    except Exception as e:
        print(f"Error: {e}")
        
    # Compare results
    if (not original_df.empty and not fixed_df.empty and 
        original_df.shape != fixed_df.shape):
        print("\nDifference detected between original and fixed implementations!")
        print(f"Original shape: {original_df.shape}")
        print(f"Fixed shape: {fixed_df.shape}")
        
        # Compare columns
        original_cols = set(original_df.columns)
        fixed_cols = set(fixed_df.columns)
        
        print(f"Columns only in original: {original_cols - fixed_cols}")
        print(f"Columns only in fixed: {fixed_cols - original_cols}")

## 9. Test Specific API Method Wrappers

Let's test some of the specific wrapper methods in the client to see if they correctly call the API and process the results.

In [None]:
# Define a list of methods to test with their arguments
from datetime import datetime, timedelta

# Get a few dates for testing
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
last_week = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")

# List of (method_name, args) to test
method_tests = [
    ("get_demand_actual_total", {}),
    ("get_generation_actual_per_type", {}),
    ("get_forecast_demand_day_ahead", {}),
    ("get_generation_outturn_current", {}),
    ("get_forecast_generation_wind_latest", {}),
    ("get_settlement_system_prices", {"settlementDate": yesterday}),
    ("get_acceptances", {"from_": last_week, "to": yesterday})
]

# Test each method with both original and fixed client
for method_name, kwargs in method_tests:
    print(f"\n\n{'='*50}")
    print(f"Testing method: {method_name}")
    print(f"With args: {kwargs}")
    print(f"{'='*50}")
    
    # Original client
    print("\nOriginal client:")
    original_method = getattr(original_client, method_name)
    try:
        start_time = pd.Timestamp.now()
        original_df = original_method(**kwargs)
        end_time = pd.Timestamp.now()
        print(f"Call completed in {(end_time - start_time).total_seconds():.2f} seconds")
        print(f"Result shape: {original_df.shape}")
        if not original_df.empty:
            print(f"Columns: {original_df.columns.tolist()[:5]}")
            print("First row sample:")
            sample = original_df.iloc[0].to_dict()
            print({k: v for i, (k, v) in enumerate(sample.items()) if i < 5})
        else:
            print("Empty DataFrame returned")
    except Exception as e:
        print(f"Error: {e}")
    
    # Fixed client
    print("\nFixed client:")
    fixed_method = getattr(fixed_client, method_name)
    try:
        start_time = pd.Timestamp.now()
        fixed_df = fixed_method(**kwargs)
        end_time = pd.Timestamp.now()
        print(f"Call completed in {(end_time - start_time).total_seconds():.2f} seconds")
        print(f"Result shape: {fixed_df.shape}")
        if not fixed_df.empty:
            print(f"Columns: {fixed_df.columns.tolist()[:5]}")
            print("First row sample:")
            sample = fixed_df.iloc[0].to_dict()
            print({k: v for i, (k, v) in enumerate(sample.items()) if i < 5})
        else:
            print("Empty DataFrame returned")
    except Exception as e:
        print(f"Error: {e}")

## 10. Summary of Findings and Recommended Fixes

Based on our testing, let's summarize what we've found and recommend changes to the Elexon API client:

1. Response structure variations:
   - Some endpoints return a list directly
   - Some return a dict with a "data" key that contains a list
   - Some return a dict with a "data" key that contains a dict
   - Some return a dict with no "data" key at all

2. Key issues in the current implementation:
   - Not handling endpoints that return a dict without a "data" key
   - Not properly handling nested data structures

3. Recommended fixes:
   - Update the `_get` method to handle all the response structure variations
   - Ensure proper handling of metadata vs data fields in the response
   - Add better error handling and logging

## 11. Implement the Final Fix

Based on our testing, here's the implementation we recommend for the ElexonApiClient._get method:

In [None]:
def recommended_get(path: str, params: dict) -> pd.DataFrame:
    """
    Recommended implementation for the _get method in ElexonApiClient
    
    This implementation handles all observed response structures:
    1. Lists of data objects
    2. Dicts with a "data" key containing a list
    3. Dicts with a "data" key containing a dict 
    4. Dicts without a "data" key (treated as a single data record)
    """
    url = f"https://data.elexon.co.uk/bmrs/api/v1{path}"
    headers = {"apiKey": config.ELEXON_API_KEY}
    
    try:
        response = requests.get(url, headers=headers, params=params, timeout=30)
        response.raise_for_status()
        payload = response.json()
        
        # Case 1: Direct list of data objects
        if isinstance(payload, list):
            return pd.DataFrame(payload)
        
        # Case 2: Dict response
        elif isinstance(payload, dict):
            # Case 2a: Has 'data' key containing list
            if "data" in payload and isinstance(payload["data"], list):
                return pd.DataFrame(payload["data"])
                
            # Case 2b: Has 'data' key containing dict
            elif "data" in payload and isinstance(payload["data"], dict):
                return pd.DataFrame([payload["data"]])
                
            # Case 2c: Dict without 'data' key
            else:
                # Filter out common metadata keys if present
                metadata_keys = ["apiVersion", "batchSize", "totalRecords", "status", 
                                "serviceType", "elapsedTime"]
                
                # If it has typical data fields, treat as a data record
                if any(k for k in payload.keys() if k not in metadata_keys):
                    return pd.DataFrame([payload])
                else:
                    # If it's just metadata, return empty dataframe
                    return pd.DataFrame()
        
        # Case 3: Unexpected format
        else:
            print(f"Warning: Unexpected response format from {url}")
            return pd.DataFrame()
            
    except requests.RequestException as e:
        print(f"Error fetching {url} with params={params}: {e}")
        return pd.DataFrame()
    except ValueError as e:
        print(f"Error parsing JSON from {url}: {e}")
        return pd.DataFrame()

## 12. Final Recommendation and Code Update

Based on our testing, we recommend updating the `_get` method in `ElexonApiClient` class to properly handle the different response structures from the Elexon API. Here's the implementation we've tested that works for all the scenarios we encountered.

In [None]:
# Final code to update in elexon_client.py

"""
Replace the existing _get method with this improved version that handles
all observed response structures from the Elexon API.
"""

def _get(self, path: str, params: dict) -> pd.DataFrame:
    """
    Internal helper to do a GET at self.base_url + path, with query params=params
    and header {"apiKey": self.api_key}. Returns DataFrame from JSON payload.
    
    Handles multiple response formats:
    - List of data objects
    - Dict with 'data' key containing a list
    - Dict with 'data' key containing a dict
    - Dict without 'data' key (treated as a single record)
    """
    url = f"{self.base_url}{path}"
    headers = {"apiKey": self.api_key}
    
    try:
        response = requests.get(url, headers=headers, params=params, timeout=30)
        response.raise_for_status()
        payload = response.json()
        
        # Case 1: Direct list of data objects
        if isinstance(payload, list):
            return pd.DataFrame(payload)
        
        # Case 2: Dict response
        elif isinstance(payload, dict):
            # Case 2a: Has 'data' key containing list
            if "data" in payload and isinstance(payload["data"], list):
                return pd.DataFrame(payload["data"])
                
            # Case 2b: Has 'data' key containing dict
            elif "data" in payload and isinstance(payload["data"], dict):
                return pd.DataFrame([payload["data"]])
                
            # Case 2c: Dict without 'data' key
            else:
                # Filter out common metadata keys if present
                metadata_keys = ["apiVersion", "batchSize", "totalRecords", "status", 
                                "serviceType", "elapsedTime"]
                
                # If it has typical data fields, treat as a data record
                if any(k for k in payload.keys() if k not in metadata_keys):
                    return pd.DataFrame([payload])
                else:
                    # If it's just metadata, return empty dataframe
                    return pd.DataFrame()
        
        # Case 3: Unexpected format
        else:
            print(f"Warning: Unexpected response format from {url}")
            return pd.DataFrame()
            
    except requests.RequestException as e:
        print(f"Error fetching {url} with params={params}: {e}")
        return pd.DataFrame()
    except ValueError as e:
        print(f"Error parsing JSON from {url}: {e}")
        return pd.DataFrame()