In [1]:
import requests
import pandas as pd
from io import StringIO
import xml.etree.ElementTree as ET
from typing import Dict

In [7]:
import requests
import pandas as pd
from typing import Dict, Optional, List
import xml.etree.ElementTree as ET
from io import StringIO

SDMX_BASE = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/3.0"

def get_dimension_order(dataset_code: str) -> List[str]:
    """
    Fetch dataflow structure to get correct dimension order.
    """
    structure_url = f"{SDMX_BASE}/structure/dataflow/ESTAT/{dataset_code}"
    
    resp = requests.get(structure_url, timeout=90)
    resp.raise_for_status()
    
    root = ET.fromstring(resp.content)
    ns = {
        's': 'http://www.sdmx.org/resources/sdmxml/schemas/v3_0/structure'
    }
    
    dims = []
    for dim in root.findall('.//s:Dimension', ns):
        dim_id = dim.get('id')
        if dim_id:
            dims.append(dim_id)
    
    return dims


def fetch_codelists(dataset_code: str) -> Dict[str, pd.DataFrame]:
    """
    Fetch all dimension codelists.
    """
    structure_url = f"{SDMX_BASE}/structure/dataflow/ESTAT/{dataset_code}?references=descendants&detail=referencepartial"
    
    resp = requests.get(structure_url, timeout=90)
    resp.raise_for_status()
    
    root = ET.fromstring(resp.content)
    ns = {
        's': 'http://www.sdmx.org/resources/sdmxml/schemas/v3_0/structure',
        'c': 'http://www.sdmx.org/resources/sdmxml/schemas/v3_0/common'
    }
    
    mappings = {}
    
    for codelist in root.findall('.//s:Codelist', ns):
        cl_id = codelist.get('id')
        codes = []
        
        for code in codelist.findall('.//s:Code', ns):
            code_id = code.get('id')
            name_elem = code.find('.//c:Name[@xml:lang="en"]', ns)
            if name_elem is not None:
                codes.append({'code': code_id, 'label': name_elem.text})
        
        if codes:
            mappings[cl_id] = pd.DataFrame(codes)
    
    return mappings


def fetch_eurostat_data(
    dataset_code: str = "nrg_cb_gasm",
    filters: Optional[Dict[str, str]] = None,
    start_period: Optional[str] = None,
    end_period: Optional[str] = None,
    enrich_labels: bool = True
) -> pd.DataFrame:
    """
    Fetch data from Eurostat SDMX 3.0 API.
    
    Args:
        dataset_code: Eurostat dataset code
        filters: Dict with dimension filters, e.g. {'freq': 'M', 'geo': 'DE+FR', 'nrg_bal': 'IMP'}
        start_period: Start period '2019-01'
        end_period: End period '2024-12'
        enrich_labels: Merge dimension labels
    
    Returns:
        DataFrame with data and labels
    """
    dim_order = get_dimension_order(dataset_code)
    print(f"Dimension order: {dim_order}")
    
    # Build key from filters
    key_parts = []
    for dim in dim_order:
        if filters and dim in filters:
            key_parts.append(filters[dim])
        else:
            key_parts.append('')
    
    key = '.'.join(key_parts)
    
    data_url = f"{SDMX_BASE}/data/dataflow/ESTAT/{dataset_code}/{key}"
    
    params = {'format': 'csvdata'}
    if start_period:
        params['startPeriod'] = start_period
    if end_period:
        params['endPeriod'] = end_period
    
    print(f"Fetching: {data_url}")
    print(f"Params: {params}")
    
    resp = requests.get(data_url, params=params, timeout=120)
    resp.raise_for_status()
    
    df = pd.read_csv(StringIO(resp.text))
    
    df['date'] = pd.to_datetime(df['TIME_PERIOD'], format='%Y-%m')
    df.rename(columns={'OBS_VALUE': 'value'}, inplace=True)
    
    if enrich_labels:
        mappings = fetch_codelists(dataset_code)
        
        for dim in dim_order:
            dim_lower = dim.lower()
            if dim_lower in df.columns:
                # Try to find matching codelist
                for cl_id, cl_df in mappings.items():
                    if dim.upper() in cl_id or dim_lower in cl_id.lower():
                        df = df.merge(
                            cl_df.rename(columns={'code': dim_lower, 'label': f'{dim_lower}_label'}),
                            on=dim_lower,
                            how='left'
                        )
                        break
    
    return df


if __name__ == "__main__":
    df = fetch_eurostat_data(
        filters={
            'freq': 'M',
            'nrg_bal': 'IMP+EXP',
            'geo': 'DE+FR+IT+ES+NL'
        },
        start_period='2019-01',
        end_period='2024-12',
        enrich_labels=True
    )
    
    print(f"\nFetched {len(df)} rows")
    print("\nSample:")
    print(df.head(20))
    print(f"\nDate range: {df['date'].min()} to {df['date'].max()}")

AssertionError: 

AssertionError: 