In [1]:
import os
from multiprocessing.dummy import Pool
from typing import Callable

import pandas as pd
from entsoe import EntsoePandasClient
from entsoe.mappings import PSRTYPE_MAPPINGS
from entsoe.exceptions import NoMatchingDataError
from pycountry import countries
from dotenv import load_dotenv

from const import TZ, START, END, COUNTRIES, GEN_TYPES

## Actual generation

Define functions for querying the ENTSO-E Tranparency Platform API and downloading the needed data in parallel.

In [2]:
# Load environment setting (API key)
load_dotenv()

# Create client to ENTSO-E TP
client = EntsoePandasClient(api_key=os.getenv("ENTSOE_APIKEY"))

# Invert the psr mapping so that we can get psr types by textual gen. types
inverted_psr_mapping = {value: key for key, value in PSRTYPE_MAPPINGS.items()}


def get_gen_data(country: str, gentype: str) -> pd.Series:
    """Query generation data for country and generation type
    """
    # Get the two-letter country code for this country
    try:
        country_code = countries.get(name=country).alpha_2
    except AttributeError:
        raise KeyError(country)
        
    # Execute the query
    try:
        ts = client.query_generation(
            country_code, 
            start=pd.Timestamp(START, tz=TZ), 
            end=pd.Timestamp(END, tz=TZ), 
            psr_type=inverted_psr_mapping[gentype]
        )[gentype]  # Select the only column
    except (NoMatchingDataError, ValueError):
        # Create an empty series
        ts = pd.Series(name=(country_code, gentype)).tz_localize('UTC')
    else:
        # Make sure we have real timestamps in UTC
        ts.index = pd.DatetimeIndex(ts.index, name="timestamp").tz_convert("UTC")
        ts.name = (country_code, gentype)
    return ts.sort_index()


def download_parallel(function: Callable, arguments, 
                      n_threads=1,
                      index_name=None, columns_name=None) -> pd.DataFrame:
    """Download data using a defined function
    
    Args:
        function: The query function to use, must return a pandas Series
        arguments: List of tuples to pass as arguments to `function`
        n_threads: Number of threads to use
        index_name (optional): Name for index
        columns_name (optional): Name(s) for the columns
    """
    with Pool(n_threads) as p:
        series = p.starmap(function, arguments)
    df = pd.concat(series, axis=1)
    if index_name is not None:
        df.index.name = index_name
    if columns_name is not None:
        df.columns.names = columns_name
    return df.sort_index(0).sort_index(1)

Download data for all generation types using four threads and store in the raw data folder.

In [17]:
for gt in GEN_TYPES:
    idx = pd.MultiIndex.from_product([COUNTRIES, [gt]])
    df = None
    while True:
        try:
            df = download_parallel(get_gen_data, idx, n_threads=4)
        except ConnectionError:
            pass
        else:
            if df is not None: break
    filename = f"../data/raw/ENTSO-E_TP_generation_{gt}.csv" 
    df.xs(gt, axis=1, level=1).to_csv(filename, header=True)
    print(f"Wrote {filename}")

Wrote ../data/raw/ENTSO-E_TP_generation_Solar.csv
Wrote ../data/raw/ENTSO-E_TP_generation_Wind Onshore.csv
Wrote ../data/raw/ENTSO-E_TP_generation_Wind Offshore.csv


## Installed generation capacity

Let’s download installed capacities from ENTSO-E

In [3]:
def get_installed_cap_data(gentype: str, country: str) -> pd.Series:
    """Query generation data for country and generation type
    """
    # Get the two-letter country code for this country
    try:
        country_code = countries.get(name=country).alpha_2
    except AttributeError:
        raise KeyError(country)
    
    # Time series name has the arguments
    ts_name = (gentype, country_code)
        
    # Execute the query
    try:
        ts = client.query_installed_generation_capacity(
            country_code, 
            start=pd.Timestamp(f'{pd.Timestamp(START).year - 1}-01-01'), 
            end=pd.Timestamp(END), 
            psr_type=inverted_psr_mapping[gentype]
        )[gentype]  # Select the only column
    except NoMatchingDataError:
        # Create an empty series
        ts = pd.Series(name=ts_name)
    else:
        ts.index = ts.index.year  # ENTSO-E has numbers for the beginning of the year
        ts.index.name = 'Year'
        ts.name = ts_name
    return ts

In [4]:
idx = pd.MultiIndex.from_product([GEN_TYPES, COUNTRIES])
df = download_parallel(get_installed_cap_data, idx, n_threads=4)
df.T.to_csv("../data/raw/ENTSO-E_TP_installed_cap.csv", header=True)
print(f"Wrote {filename}")

HTTPError: 500 Server Error: Internal Server Error for url: https://transparency.entsoe.eu/api?documentType=A68&processType=A33&in_Domain=10YPT-REN------W&psrType=B16&securityToken=9d9fb6c3-f71b-4ce3-97b6-e53540bd16bb&periodStart=201801010000&periodEnd=201901010000