In [5]:
import pandas as pd
import requests
import xml.etree.ElementTree as ET
from tqdm import tqdm
import sys
sys.path.append("..")

import config

In [20]:
domain = "10YFR-RTE------C"

# DOMAINS
# be: 10YBE----------2
# de/lu: 10Y1001A1001A82H
# de/at/lu: 10Y1001A1001A63L
# nl: 10YNL----------L
# fr: 10YFR-RTE------C

dates = range(2015, 2023)
priceDAH_df = pd.DataFrame([], columns = ["datetime", "price"])

In [7]:
def get_response(year, BZ):
    
    """
    This function takes in a year and a bidding zone.
    It calls the API, and returns the API response.
    This can be fed to the data format function.
    """
    
    startdate = str(year) + "01010000"
    enddate = str(year) + "12312300"
    domain = BZ
    
    try:
        response = requests.get("https://web-api.tp.entsoe.eu/api",
                        params = {"securityToken": config.security_token,
                                "documentType": "A44",
                                "in_Domain": domain,
                                "out_Domain": domain,
                                "periodStart": startdate,
                                "periodEnd": enddate})
    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
        
    return response

In [8]:
def parse_data(resp):
    """
    This function takes in a response from the API.
    It returns a pandas dataframe with variables start and price. 
    Price is a number, and start is a datetime variable.
    The index starts at 0 and goes until the end.
    This function is to be executed per year, per bidding zone.
    Resulting dataframes can then later be combined.
    """
    response = resp

    root = ET.fromstring(response.content)
    timeseries = root.findall("./{urn:iec62325.351:tc57wg16:451-3:publicationdocument:7:0}TimeSeries")

    df_cols = ["start", "position", "price"]
    df_rows = []

    for ts in timeseries:
        data = ts[7]
        for point in data[2:]:
            df_rows.append({
                "start": data[0][0].text,
                "position": point[0].text,
                "price": point[1].text
            })

    combined_df = pd.DataFrame(df_rows, columns = df_cols)

    #print(f"Resolution {data[1].text}")

    combined_df["start"] = combined_df["start"].astype("datetime64")
    combined_df["position"] = pd.to_timedelta(combined_df["position"].astype("int"), unit="h")
    
    out_df = pd.DataFrame([], columns = ["datetime", "price"])
    
    out_df["datetime"] = combined_df["start"] + combined_df["position"] - pd.to_timedelta(1, unit="h")
    out_df["price"] = combined_df["price"]
    
    return out_df

In [21]:
for year in tqdm(dates):
    resp = get_response(year, domain)
    out_df = parse_data(resp)
    priceDAH_df = pd.concat([priceDAH_df, out_df], axis=0, ignore_index=True)

100%|█████████████████████████████████████████████| 8/8 [00:58<00:00,  7.31s/it]


In [22]:
priceDAH_df.to_csv("../data/priceDAH_FR.csv", index=False)