In [10]:
import pandas as pd
import requests
import xml.etree.ElementTree as ET
from tqdm import tqdm
import sys
sys.path.append("..")

import config

In [11]:
domain = "10YBE----------2"

dates = range(2015, 2023)
#loadACT_df = pd.DataFrame([], columns = ["datetime", "load"])
temp_df = pd.DataFrame([], columns = ["datetime", "load"])

In [12]:
def get_response(year, BZ):
    
    """
    This function takes in a year and a bidding zone.
    It calls the API, and returns the API response.
    This can be fed to the data format function.
    """
    
    startdate = str(year) + "01010000"
    enddate = str(year) + "12312300"
    domain = BZ
    
    try:
        response = requests.get("https://web-api.tp.entsoe.eu/api",
                        params = {"securityToken": config.security_token,
                                "documentType": "A65",
                                "processType": "A16",
                                "outBiddingZone_Domain": domain,
                                "periodStart": startdate,
                                "periodEnd": enddate})
    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
        
    return response

In [13]:
def parse_data(resp):
    """
    This function takes in a response from the API.
    It returns a pandas dataframe with variables start and price. 
    Price is a number, and start is a datetime variable.
    The index starts at 0 and goes until the end.
    This function is to be executed per year, per bidding zone.
    Resulting dataframes can then later be combined.
    """
    response = resp
    root = ET.fromstring(response.content)
    timeseries = root.findall("./{urn:iec62325.351:tc57wg16:451-6:generationloaddocument:3:0}TimeSeries")
    data = timeseries[0][6]

    df_cols = ["start", "position", "quantity"]
    df_rows = []

    for child in data[2:]:
        df_rows.append({
            "start": data[0][0].text,
            "position": child[0].text,
            "quantity": child[1].text
        })

    combined_df = pd.DataFrame(df_rows, columns = df_cols)

    # print(f"Resolution {data[1].text}")

    combined_df["start"] = combined_df["start"].astype("datetime64")
    combined_df["position"] = pd.to_timedelta(combined_df["position"].astype("int")*15, unit="m")
    
    out_df = pd.DataFrame([], columns = ["datetime", "load"])
    out_df["datetime"] = combined_df["start"] + combined_df["position"] - pd.to_timedelta(15, unit="m")
    out_df["load"] = combined_df["quantity"]
    
    return out_df

In [14]:
for year in tqdm(dates):
    resp = get_response(year, domain)
    out_df = parse_data(resp)
    temp_df = pd.concat([temp_df, out_df], axis=0, ignore_index=True)

100%|█████████████████████████████████████████████| 8/8 [00:34<00:00,  4.29s/it]


In [15]:
# convert from 15 minutes to 60 minutes
temp_df["load"] = temp_df["load"].astype(float)
hourly = temp_df.resample("H", on="datetime").load.mean()
loadACT_df = pd.DataFrame({"datetime": hourly.index, "load": hourly.values})

In [17]:
loadACT_df.to_csv("../data/temp/loadACT.csv", index=False)