## API pull / data transformation task - Germany energy price data extraction & preprocessing

**Objective**: Extract and preprocess historical price data for Germany's day-ahead market and ancillary service market to support the battery revenue model.

**Steps**:

1. **Data Extraction**: Utilize the ENTSOe API to retrieve historical price data in XML format for both the day-ahead market and ancillary service market in Germany.

2. **Data Transformation**: Convert the extracted XML data into a structured CSV format. The transformation should ensure that the data is organized as an hourly timeseries.

3. **Preprocessing**: Ensure that the CSV data is clean, properly formatted, and ready for integration into the battery revenue model.

In [60]:
#!pip install --upgrade certifi

In [1]:
from entsoe import EntsoePandasClient
import pandas as pd
import xml.etree.ElementTree as ET
import requests

import warnings
warnings.filterwarnings("ignore")

In [80]:
# Maximize output by offset argument via loop - Day Ahead market
# 2022

import requests
import xml.etree.ElementTree as ET
import pandas as pd

# Base URL for the API request
base_url = "https://web-api.tp.entsoe.eu/api"

# Parameters for the API request
params = {
    'documentType': 'A44',
    'out_Domain': '10Y1001A1001A82H',
    'in_Domain': '10Y1001A1001A82H',
    'periodStart': '202201010000',
    'periodEnd': '202212310000',
    'classificationSequence_AttributeInstanceComponent.position': '2', # Interchange for hourly DA vs 15 min intraday
    'securityToken': 'a9edeb0e-fdf2-4431-b850-cbbe2201c6bc'
}

# Initialize an empty list to store data
all_data = []

# Loop through offsets to retrieve all necessary documents
for offset in range(0, 400, 100):  # Adjust the range as needed
    params['offset'] = offset
    response = requests.get(base_url, params=params, verify=False)
    
    # Parse the XML content
    root = ET.fromstring(response.text)
    
    # Namespace dictionary to handle XML namespaces
    ns = {'ns': 'urn:iec62325.351:tc57wg16:451-3:publicationdocument:7:3'}
    
    # Extract data from XML
    for time_series in root.findall('.//ns:TimeSeries', ns):
        series_id = time_series.find('ns:mRID', ns).text
        auction_type = time_series.find('ns:auction.type', ns).text
        business_type = time_series.find('ns:businessType', ns).text
        in_domain = time_series.find('ns:in_Domain.mRID', ns).text
        out_domain = time_series.find('ns:out_Domain.mRID', ns).text
        contract_type = time_series.find('ns:contract_MarketAgreement.type', ns).text
        currency = time_series.find('ns:currency_Unit.name', ns).text
        unit = time_series.find('ns:price_Measure_Unit.name', ns).text
        curve_type = time_series.find('ns:curveType', ns).text
        
        # Extract period details
        period = time_series.find('ns:Period', ns)
        start_time = period.find('ns:timeInterval/ns:start', ns).text
        end_time = period.find('ns:timeInterval/ns:end', ns).text
        resolution = period.find('ns:resolution', ns).text

        for point in period.findall('ns:Point', ns):
            position = point.find('ns:position', ns).text
            price_amount = point.find('ns:price.amount', ns).text
            
            # Append to data list
            all_data.append({
                'series_id': series_id,
                'auction_type': auction_type,
                'business_type': business_type,
                'in_domain': in_domain,
                'out_domain': out_domain,
                'contract_type': contract_type,
                'currency': currency,
                'unit': unit,
                'curve_type': curve_type,
                'start_time': start_time,
                'end_time': end_time,
                'resolution': resolution,
                'position': position,
                'price_amount': price_amount
            })

# Convert to DataFrame
DA2022 = pd.DataFrame(all_data)

# Convert columns to appropriate data types
#DA['position'] = pd.to_numeric(DA['position'])
#DA['price_amount'] = pd.to_numeric(DA['price_amount'])

#DA

In [82]:
# 2023 - due to platform's 1 year limitation requirements

# Base URL for the API request
base_url = "https://web-api.tp.entsoe.eu/api"

# Parameters for the API request
params = {
    'documentType': 'A44',
    'out_Domain': '10Y1001A1001A82H',
    'in_Domain': '10Y1001A1001A82H',
    'periodStart': '202301010000',
    'periodEnd': '202312310000',
    'classificationSequence_AttributeInstanceComponent.position': '2', # Interchange for hourly DA vs 15 min intraday
    'securityToken': 'a9edeb0e-fdf2-4431-b850-cbbe2201c6bc'
}

# Initialize an empty list to store data
all_data = []

# Loop through offsets to retrieve all necessary documents
for offset in range(0, 400, 100):  # Adjust the range as needed
    params['offset'] = offset
    response = requests.get(base_url, params=params, verify=False)
    
    # Parse the XML content
    root = ET.fromstring(response.text)
    
    # Namespace dictionary to handle XML namespaces
    ns = {'ns': 'urn:iec62325.351:tc57wg16:451-3:publicationdocument:7:3'}
    
    # Extract data from XML
    for time_series in root.findall('.//ns:TimeSeries', ns):
        series_id = time_series.find('ns:mRID', ns).text
        auction_type = time_series.find('ns:auction.type', ns).text
        business_type = time_series.find('ns:businessType', ns).text
        in_domain = time_series.find('ns:in_Domain.mRID', ns).text
        out_domain = time_series.find('ns:out_Domain.mRID', ns).text
        contract_type = time_series.find('ns:contract_MarketAgreement.type', ns).text
        currency = time_series.find('ns:currency_Unit.name', ns).text
        unit = time_series.find('ns:price_Measure_Unit.name', ns).text
        curve_type = time_series.find('ns:curveType', ns).text
        
        # Extract period details
        period = time_series.find('ns:Period', ns)
        start_time = period.find('ns:timeInterval/ns:start', ns).text
        end_time = period.find('ns:timeInterval/ns:end', ns).text
        resolution = period.find('ns:resolution', ns).text

        for point in period.findall('ns:Point', ns):
            position = point.find('ns:position', ns).text
            price_amount = point.find('ns:price.amount', ns).text
            
            # Append to data list
            all_data.append({
                'series_id': series_id,
                'auction_type': auction_type,
                'business_type': business_type,
                'in_domain': in_domain,
                'out_domain': out_domain,
                'contract_type': contract_type,
                'currency': currency,
                'unit': unit,
                'curve_type': curve_type,
                'start_time': start_time,
                'end_time': end_time,
                'resolution': resolution,
                'position': position,
                'price_amount': price_amount
            })

# Convert to DataFrame
DA2023 = pd.DataFrame(all_data)

# Convert columns to appropriate data types
#DA['position'] = pd.to_numeric(DA['position'])
#DA['price_amount'] = pd.to_numeric(DA['price_amount'])

#DA

In [83]:
#2024

# Base URL for the API request
base_url = "https://web-api.tp.entsoe.eu/api"

# Parameters for the API request
params = {
    'documentType': 'A44',
    'out_Domain': '10Y1001A1001A82H',
    'in_Domain': '10Y1001A1001A82H',
    'periodStart': '202401010000',
    'periodEnd': '202412310000',
    'contract_MarketAgreement.type' : 'A01',
    'classificationSequence_AttributeInstanceComponent.position': '2',
    'securityToken': 'a9edeb0e-fdf2-4431-b850-cbbe2201c6bc'
}

# Initialize an empty list to store data
all_data = []

# Loop through offsets to retrieve all necessary documents
for offset in range(0, 400, 100):  # Adjust the range as needed
    params['offset'] = offset
    response = requests.get(base_url, params=params, verify=False)
    
    # Parse the XML content
    root = ET.fromstring(response.text)
    
    # Namespace dictionary to handle XML namespaces
    ns = {'ns': 'urn:iec62325.351:tc57wg16:451-3:publicationdocument:7:3'}
    
    # Extract data from XML
    for time_series in root.findall('.//ns:TimeSeries', ns):
        series_id = time_series.find('ns:mRID', ns).text
        auction_type = time_series.find('ns:auction.type', ns).text
        business_type = time_series.find('ns:businessType', ns).text
        in_domain = time_series.find('ns:in_Domain.mRID', ns).text
        out_domain = time_series.find('ns:out_Domain.mRID', ns).text
        contract_type = time_series.find('ns:contract_MarketAgreement.type', ns).text
        currency = time_series.find('ns:currency_Unit.name', ns).text
        unit = time_series.find('ns:price_Measure_Unit.name', ns).text
        curve_type = time_series.find('ns:curveType', ns).text
        
        # Extract period details
        period = time_series.find('ns:Period', ns)
        start_time = period.find('ns:timeInterval/ns:start', ns).text
        end_time = period.find('ns:timeInterval/ns:end', ns).text
        resolution = period.find('ns:resolution', ns).text

        for point in period.findall('ns:Point', ns):
            position = point.find('ns:position', ns).text
            price_amount = point.find('ns:price.amount', ns).text
            
            # Append to data list
            all_data.append({
                'series_id': series_id,
                'auction_type': auction_type,
                'business_type': business_type,
                'in_domain': in_domain,
                'out_domain': out_domain,
                'contract_type': contract_type,
                'currency': currency,
                'unit': unit,
                'curve_type': curve_type,
                'start_time': start_time,
                'end_time': end_time,
                'resolution': resolution,
                'position': position,
                'price_amount': price_amount
            })

# Convert to DataFrame
DA2024 = pd.DataFrame(all_data)

# Display the DataFrame
#print(DA)

In [85]:
DA2024

Unnamed: 0,series_id,auction_type,business_type,in_domain,out_domain,contract_type,currency,unit,curve_type,start_time,end_time,resolution,position,price_amount
0,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2023-12-31T23:00Z,2024-01-01T23:00Z,PT15M,1,39.91
1,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2023-12-31T23:00Z,2024-01-01T23:00Z,PT15M,2,-0.04
2,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2023-12-31T23:00Z,2024-01-01T23:00Z,PT15M,3,-9.01
3,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2023-12-31T23:00Z,2024-01-01T23:00Z,PT15M,4,-29.91
4,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2023-12-31T23:00Z,2024-01-01T23:00Z,PT15M,5,25.28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34952,66,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,92,-12.42
34953,66,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,93,34.3
34954,66,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,94,15.93
34955,66,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,95,-9.91


In [86]:
DA2 = pd.concat([DA2022, DA2023, DA2024], ignore_index=True)

In [87]:
DA2.to_csv("C:\\Users\\tom.kim\\OneDrive - S&P Global\\Desktop\\Data Science\\Battery Revenues\\pull data\\Germany\\DA2.csv")

In [88]:
DA2 = pd.read_csv("C:\\Users\\tom.kim\\OneDrive - S&P Global\\Desktop\\Data Science\\Battery Revenues\\pull data\\Germany\\DA2.csv")

In [89]:
DA2

Unnamed: 0.1,Unnamed: 0,series_id,auction_type,business_type,in_domain,out_domain,contract_type,currency,unit,curve_type,start_time,end_time,resolution,position,price_amount
0,0,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2021-12-31T23:00Z,2022-01-01T23:00Z,PT15M,1,69.17
1,1,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2021-12-31T23:00Z,2022-01-01T23:00Z,PT15M,2,35.61
2,2,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2021-12-31T23:00Z,2022-01-01T23:00Z,PT15M,3,23.14
3,3,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2021-12-31T23:00Z,2022-01-01T23:00Z,PT15M,4,-3.88
4,4,1,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2021-12-31T23:00Z,2022-01-01T23:00Z,PT15M,5,85.06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104496,104496,66,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,92,-12.42
104497,104497,66,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,93,34.30
104498,104498,66,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,94,15.93
104499,104499,66,A01,A62,10Y1001A1001A82H,10Y1001A1001A82H,A01,EUR,MWH,A03,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,95,-9.91


In [90]:
DA2 = DA2[['start_time', 'end_time', 'position', 'price_amount']]

DA2 = DA2.drop_duplicates(subset=['start_time', 'end_time', 'position'])

DA2['start_time'] = pd.to_datetime(DA2['start_time'], format='%Y-%m-%dT%H:%MZ')
DA2['end_time'] = pd.to_datetime(DA2['end_time'], format='%Y-%m-%dT%H:%MZ')
DA2['position'] = DA2['position'].astype(int)
DA2['price_amount'] = DA2['price_amount'].astype(float)

In [91]:
DA2['exact_time'] = DA2['start_time'] + pd.to_timedelta(15*(DA2['position']-1)+60, unit='m')

In [92]:
DA2 = DA2[['exact_time', 'price_amount']]

DA2.set_index('exact_time', inplace=True)

DA2 = DA2.asfreq('15T').ffill()

DA2 = DA2.rename(columns={'price_amount' : 'Prices'})

In [93]:
DA2

Unnamed: 0_level_0,Prices
exact_time,Unnamed: 1_level_1
2022-01-01 00:00:00,69.17
2022-01-01 00:15:00,35.61
2022-01-01 00:30:00,23.14
2022-01-01 00:45:00,-3.88
2022-01-01 01:00:00,85.06
...,...
2024-12-31 22:45:00,-12.42
2024-12-31 23:00:00,34.30
2024-12-31 23:15:00,15.93
2024-12-31 23:30:00,-9.91


In [94]:
# Physical split and append method
def split_and_save(df, df_name, directory):
    
    # Extract years
    years = [2023, 2024]
    
    for column in df.columns:
        year_series = []
        for year in years:
            year_data = df[df.index.year == year][column].dropna().reset_index(drop=True)
            year_series.append(year_data)
            
        # Combine the yearly series into one DataFrame with columns for each year
        combined_df = pd.concat(year_series, axis=1).fillna(0)
        combined_df.columns = [f"{year}" for year in years]

         # Save the combined DataFrame to a CSV file
        file_name = f"{directory}{df_name}_{column.strip()}.csv"
        combined_df.to_csv(file_name, index=False)

In [95]:
split_and_save(DA2, 'DA2', directory ="final data\\2024\\Germany\\")

In [121]:
# FCR market price pull
base_url = "https://web-api.tp.entsoe.eu/api"

# Parameters for the API request
params = {
    'documentType': 'A81',
    'businessType': 'B95',
    'processType': 'A52',
    'Type_MarketAgreement.Type': 'A01',
    'controlArea_Domain': '10YDE-EON------1', #SCA|DE(TenneT GER)
    'periodStart': '202201010000',
    'periodEnd' : '202412312300',
    'securityToken': 'a9edeb0e-fdf2-4431-b850-cbbe2201c6bc'
}

# Initialize an empty list to store data
all_data = []

# Loop through offsets to retrieve all necessary documents
for offset in range(0, 1300, 100):  # Adjust the range as needed
    params['offset'] = offset
    response = requests.get(base_url, params=params, verify=False)
    
    # Parse the XML content
    root = ET.fromstring(response.text)
    
    # Namespace dictionary to handle XML namespaces
    ns = {'ns': 'urn:iec62325.351:tc57wg16:451-6:balancingdocument:4:4'}
    
    # Extract data from XML
    for time_series in root.findall('.//ns:TimeSeries', ns):
        series_id = time_series.find('ns:mRID', ns).text
        business_type = time_series.find('ns:businessType', ns).text
        market_agreement_type = time_series.find('ns:type_MarketAgreement.type', ns).text
        #market_product_type = time_series.find('ns:standard_MarketProduct.marketProductType', ns).text
        psr_type = time_series.find('ns:mktPSRType.psrType', ns).text
        flow_direction = time_series.find('ns:flowDirection.direction', ns).text
        currency = time_series.find('ns:currency_Unit.name', ns).text
        quantity_unit = time_series.find('ns:quantity_Measure_Unit.name', ns).text
        curve_type = time_series.find('ns:curveType', ns).text
        
        # Extract period details
        period = time_series.find('ns:Period', ns)
        start_time = period.find('ns:timeInterval/ns:start', ns).text
        end_time = period.find('ns:timeInterval/ns:end', ns).text
        resolution = period.find('ns:resolution', ns).text
        
        for point in period.findall('ns:Point', ns):
            position = point.find('ns:position', ns).text
            #quantity = point.find('ns:quantity', ns).text
            procurement_price = point.find('ns:procurement_Price.amount', ns).text
            imbalance_category = point.find('ns:imbalance_Price.category', ns).text
                
            # Append to data list
            all_data.append({
                'series_id': series_id,
                'business_type': business_type,
                'market_agreement_type': market_agreement_type,
                #'market_product_type': market_product_type,
                'psr_type': psr_type,
                'flow_direction': flow_direction,
                'currency': currency,
                'quantity_unit': quantity_unit,
                'curve_type': curve_type,
                'start_time': start_time,
                'end_time': end_time,
                'resolution': resolution,
                'position': position,
                #'quantity': quantity,
                'procurement_price': procurement_price,
                'imbalance_category': imbalance_category
            })

# Convert to DataFrame
FCR = pd.DataFrame(all_data)

#FCR

In [122]:
FCR

Unnamed: 0,series_id,business_type,market_agreement_type,psr_type,flow_direction,currency,quantity_unit,curve_type,start_time,end_time,resolution,position,procurement_price,imbalance_category
0,1,B95,A01,A04,A03,EUR,MAW,A01,2022-04-06T22:00Z,2022-04-07T22:00Z,PT15M,1,7.38,A07
1,1,B95,A01,A04,A03,EUR,MAW,A01,2022-04-06T22:00Z,2022-04-07T22:00Z,PT15M,2,7.38,A07
2,1,B95,A01,A04,A03,EUR,MAW,A01,2022-04-06T22:00Z,2022-04-07T22:00Z,PT15M,3,7.38,A07
3,1,B95,A01,A04,A03,EUR,MAW,A01,2022-04-06T22:00Z,2022-04-07T22:00Z,PT15M,4,7.38,A07
4,1,B95,A01,A04,A03,EUR,MAW,A01,2022-04-06T22:00Z,2022-04-07T22:00Z,PT15M,5,7.38,A07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111843,65,B95,A01,A04,A03,EUR,MAW,A01,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,92,5.77,A07
111844,65,B95,A01,A04,A03,EUR,MAW,A01,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,93,5.77,A07
111845,65,B95,A01,A04,A03,EUR,MAW,A01,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,94,5.77,A07
111846,65,B95,A01,A04,A03,EUR,MAW,A01,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,95,5.77,A07


In [110]:
FCR.to_csv("Germany\\FCR_new.csv")

In [111]:
FCR1 = pd.read_csv("Germany\\FCR_new.csv")

In [114]:
FCR = FCR1[['start_time', 'end_time', 'position', 'procurement_price']]

FCR = FCR.drop_duplicates(subset=['start_time', 'end_time', 'position'])

FCR['start_time'] = pd.to_datetime(FCR['start_time'], format='%Y-%m-%dT%H:%MZ')
FCR['end_time'] = pd.to_datetime(FCR['end_time'], format='%Y-%m-%dT%H:%MZ')
FCR['position'] = FCR['position'].astype(int)
FCR['procurement_price'] = FCR['procurement_price'].astype(float)

In [120]:
FCR.sort_values(by='exact_time')

Unnamed: 0,start_time,end_time,position,procurement_price,exact_time
0,2022-04-06 22:00:00,2022-04-07 22:00:00,1,7.38,2022-04-06 23:00:00
1,2022-04-06 22:00:00,2022-04-07 22:00:00,2,7.38,2022-04-06 23:15:00
2,2022-04-06 22:00:00,2022-04-07 22:00:00,3,7.38,2022-04-06 23:30:00
3,2022-04-06 22:00:00,2022-04-07 22:00:00,4,7.38,2022-04-06 23:45:00
4,2022-04-06 22:00:00,2022-04-07 22:00:00,5,7.38,2022-04-07 00:00:00
...,...,...,...,...,...
111843,2024-12-30 23:00:00,2024-12-31 23:00:00,92,5.77,2024-12-31 22:45:00
111844,2024-12-30 23:00:00,2024-12-31 23:00:00,93,5.77,2024-12-31 23:00:00
111845,2024-12-30 23:00:00,2024-12-31 23:00:00,94,5.77,2024-12-31 23:15:00
111846,2024-12-30 23:00:00,2024-12-31 23:00:00,95,5.77,2024-12-31 23:30:00


In [119]:
FCR['exact_time'] = FCR['start_time'] + pd.to_timedelta(15*(FCR['position']-1)+60, unit='m')

In [41]:
FCR['procurement_price'] = FCR['procurement_price'] * 4

In [43]:
FCR = FCR[['exact_time', 'procurement_price']]

FCR.set_index('exact_time', inplace=True)

FCR = FCR.asfreq('15T').ffill()

FCR = FCR.rename(columns={'procurement_price' : 'Prices'})

In [44]:
FCR

Unnamed: 0_level_0,Prices
exact_time,Unnamed: 1_level_1
2023-01-01 00:00:00,34.84
2023-01-01 00:15:00,34.84
2023-01-01 00:30:00,34.84
2023-01-01 00:45:00,34.84
2023-01-01 01:00:00,34.84
...,...
2024-12-31 22:45:00,23.08
2024-12-31 23:00:00,23.08
2024-12-31 23:15:00,23.08
2024-12-31 23:30:00,23.08


In [45]:
split_and_save(FCR, 'FCR', directory ="final data\\2024\\Germany\\")

In [4]:
# aFRR market

# Base URL for the API request
base_url = "https://web-api.tp.entsoe.eu/api"

# Parameters for the API request
params = {
    'documentType': 'A81',
    'businessType': 'B95',
    'processType': 'A51',
    'Type_MarketAgreement.Type': 'A01',
    'controlArea_Domain': '10YDE-EON------1', #SCA|DE(TenneT GER)
    'periodStart': '202301010000',
    'periodEnd' : '202412312300',
    'securityToken': 'a9edeb0e-fdf2-4431-b850-cbbe2201c6bc'
}

# Initialize an empty list to store data
all_data = []

# Loop through offsets to retrieve all necessary documents
for offset in range(0, 1600, 100):  # Adjust the range as needed
    params['offset'] = offset
    response = requests.get(base_url, params=params, verify=False)
    
    # Parse the XML content
    root = ET.fromstring(response.text)
    
    # Namespace dictionary to handle XML namespaces
    ns = {'ns': 'urn:iec62325.351:tc57wg16:451-6:balancingdocument:4:4'}
    
    # Extract document-level information
    document_id = root.find('ns:mRID', ns).text
    revision_number = root.find('ns:revisionNumber', ns).text
    process_type = root.find('ns:process.processType', ns).text
    created_date_time = root.find('ns:createdDateTime', ns).text
    area_domain = root.find('ns:area_Domain.mRID', ns).text
    
    # Extract data from XML
    for time_series in root.findall('.//ns:TimeSeries', ns):
        series_id = time_series.find('ns:mRID', ns).text
        business_type = time_series.find('ns:businessType', ns).text
        market_agreement_type = time_series.find('ns:type_MarketAgreement.type', ns).text
       # market_product_type = time_series.find('ns:standard_MarketProduct.marketProductType', ns).text
        psr_type = time_series.find('ns:mktPSRType.psrType', ns).text
        flow_direction = time_series.find('ns:flowDirection.direction', ns).text
        currency = time_series.find('ns:currency_Unit.name', ns).text
        quantity_unit = time_series.find('ns:quantity_Measure_Unit.name', ns).text
        curve_type = time_series.find('ns:curveType', ns).text
        
        # Extract period details
        period = time_series.find('ns:Period', ns)
        start_time = period.find('ns:timeInterval/ns:start', ns).text
        end_time = period.find('ns:timeInterval/ns:end', ns).text
        resolution = period.find('ns:resolution', ns).text
        
        for point in period.findall('ns:Point', ns):
            position = point.find('ns:position', ns).text
            quantity = point.find('ns:quantity', ns).text
            procurement_price = point.find('ns:procurement_Price.amount', ns).text
            imbalance_category = point.find('ns:imbalance_Price.category', ns).text
                
            # Append to data list
            all_data.append({
                'document_id': document_id,
                'revision_number': revision_number,
                'process_type': process_type,
                'created_date_time': created_date_time,
                'area_domain': area_domain,
                'series_id': series_id,
                'business_type': business_type,
                'market_agreement_type': market_agreement_type,
                #'market_product_type': market_product_type,
                'psr_type': psr_type,
                'flow_direction': flow_direction,
                'currency': currency,
                'quantity_unit': quantity_unit,
                'curve_type': curve_type,
                'start_time': start_time,
                'end_time': end_time,
                'resolution': resolution,
                'position': position,
                'quantity': quantity,
                'procurement_price': procurement_price,
                'imbalance_category': imbalance_category
            })

# Convert to DataFrame
aFRR = pd.DataFrame(all_data)

# Print the DataFrame to check its contents
#print(aFRR)

In [5]:
aFRR

Unnamed: 0,document_id,revision_number,process_type,created_date_time,area_domain,series_id,business_type,market_agreement_type,psr_type,flow_direction,currency,quantity_unit,curve_type,start_time,end_time,resolution,position,quantity,procurement_price,imbalance_category
0,01a0998250f64e2baa783f66a9833871,1,A51,2025-01-28T13:13:27Z,10YDE-EON------1,1,B95,A01,A04,A02,EUR,MAW,A01,2022-12-31T23:00Z,2023-01-01T23:00Z,PT15M,1,316,7.52,A06
1,01a0998250f64e2baa783f66a9833871,1,A51,2025-01-28T13:13:27Z,10YDE-EON------1,1,B95,A01,A04,A02,EUR,MAW,A01,2022-12-31T23:00Z,2023-01-01T23:00Z,PT15M,2,316,7.52,A06
2,01a0998250f64e2baa783f66a9833871,1,A51,2025-01-28T13:13:27Z,10YDE-EON------1,1,B95,A01,A04,A02,EUR,MAW,A01,2022-12-31T23:00Z,2023-01-01T23:00Z,PT15M,3,316,7.52,A06
3,01a0998250f64e2baa783f66a9833871,1,A51,2025-01-28T13:13:27Z,10YDE-EON------1,1,B95,A01,A04,A02,EUR,MAW,A01,2022-12-31T23:00Z,2023-01-01T23:00Z,PT15M,4,316,7.52,A06
4,01a0998250f64e2baa783f66a9833871,1,A51,2025-01-28T13:13:27Z,10YDE-EON------1,1,B95,A01,A04,A02,EUR,MAW,A01,2022-12-31T23:00Z,2023-01-01T23:00Z,PT15M,5,316,7.52,A06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151675,94b78d87c2e747b1a1218a8c9aae5835,1,A51,2025-01-28T13:23:55Z,10YDE-EON------1,80,B95,A01,A04,A01,EUR,MAW,A01,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,92,151,1.14,A06
151676,94b78d87c2e747b1a1218a8c9aae5835,1,A51,2025-01-28T13:23:55Z,10YDE-EON------1,80,B95,A01,A04,A01,EUR,MAW,A01,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,93,151,1.14,A06
151677,94b78d87c2e747b1a1218a8c9aae5835,1,A51,2025-01-28T13:23:55Z,10YDE-EON------1,80,B95,A01,A04,A01,EUR,MAW,A01,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,94,151,1.14,A06
151678,94b78d87c2e747b1a1218a8c9aae5835,1,A51,2025-01-28T13:23:55Z,10YDE-EON------1,80,B95,A01,A04,A01,EUR,MAW,A01,2024-12-30T23:00Z,2024-12-31T23:00Z,PT15M,95,151,1.14,A06


In [6]:
aFRR.to_csv("Germany\\aFRR.csv")

In [123]:
aFRR1 = pd.read_csv("Germany\\aFRR.csv")

In [124]:
aFRR_up = aFRR1[aFRR1['flow_direction']=='A01'].copy()
aFRR_down = aFRR1[aFRR1['flow_direction']=='A02'].copy()

In [125]:
aFRR_up.reset_index(inplace=True, drop=True)
aFRR_down.reset_index(inplace=True, drop=True)

In [126]:
aFRR_up = aFRR_up[['start_time', 'end_time', 'position', 'procurement_price']]
aFRR_down = aFRR_down[['start_time', 'end_time', 'position', 'procurement_price']]

In [127]:
aFRR_up = aFRR_up.drop_duplicates(subset=['start_time', 'end_time', 'position'])
aFRR_down = aFRR_down.drop_duplicates(subset=['start_time', 'end_time', 'position'])

In [128]:
aFRR_up['start_time'] = pd.to_datetime(aFRR_up['start_time'], format='%Y-%m-%dT%H:%MZ')
aFRR_up['end_time'] = pd.to_datetime(aFRR_up['end_time'], format='%Y-%m-%dT%H:%MZ')
aFRR_down['start_time'] = pd.to_datetime(aFRR_down['start_time'], format='%Y-%m-%dT%H:%MZ')
aFRR_down['end_time'] = pd.to_datetime(aFRR_down['end_time'], format='%Y-%m-%dT%H:%MZ')

In [129]:
aFRR_up['position'] = aFRR_up['position'].astype(int)
aFRR_up['procurement_price'] = aFRR_up['procurement_price'].astype(float)
aFRR_down['position'] = aFRR_down['position'].astype(int)
aFRR_down['procurement_price'] = aFRR_down['procurement_price'].astype(float)

In [130]:
aFRR_up['exact_time'] = aFRR_up['start_time'] + pd.to_timedelta(15*(aFRR_up['position']-1)+60, unit='m')

In [131]:
aFRR_up = aFRR_up[['exact_time', 'procurement_price']].sort_values(by='exact_time')
aFRR_up['procurement_price'] = aFRR_up['procurement_price'] * 4
aFRR_up.set_index('exact_time', inplace=True)

In [132]:
aFRR_up = aFRR_up.rename(columns={'procurement_price' : 'Prices'})

In [133]:
aFRR_up = aFRR_up.asfreq('15T').ffill()

In [134]:
split_and_save(aFRR_up, 'aFRR_Up', directory ="final data\\2024\\Germany\\")

In [135]:
aFRR_down['exact_time'] = aFRR_down['start_time'] + pd.to_timedelta(15*(aFRR_down['position']-1)+60, unit='m')

In [136]:
aFRR_down = aFRR_down[['exact_time', 'procurement_price']].sort_values(by='exact_time')
aFRR_down['procurement_price'] = aFRR_down['procurement_price'] * 4
aFRR_down.set_index('exact_time', inplace=True)

In [137]:
aFRR_down = aFRR_down.rename(columns={'procurement_price' : 'Prices'})

In [138]:
aFRR_down = aFRR_down.asfreq('15T').ffill()

In [139]:
split_and_save(aFRR_down, 'aFRR_Down', directory ="final data\\2024\\Germany\\")