In [128]:
from datetime import date
from glob import glob

import pandas as pd
from pycountry import countries

from const import COUNTRIES, GEN_TYPES

## Read installed capacity data for wind power

Read capacity data from extrnally downloaded files. Each file has data for one year.

In [30]:
frames = list()
for filepath in glob("../data/external/Cumulative_capacity_table_data_*.csv"):
    frames.append(pd.read_csv(filepath))
wind_capacity_data = pd.concat(frames)

Define functions to get country code from country names (incl. fuzzy search).

In [31]:
def get_country_alpha2(country: str) -> str:
    """Get two-letter country code for a country
    """
    # Deal with some special cases first
    if country == "UK":
        cc = "GB"
    elif country == "FYROM":
        cc = "MK"
    elif country == 'Kosovo':
        cc = 'XK'
    else:
        try:
            cc = countries.get(name=country).alpha_2
        except AttributeError:
            try:
                cc = countries.search_fuzzy(country)[0].alpha_2
            except LookupError:
                cc = country
    return cc

Find country codes for the capacity data and set the date of each record to the beginning of the next year.

In [143]:
# Set the country codes
wind_capacity_data["Country code"] = wind_capacity_data["Country"].apply(get_country_alpha2)

# Set dates to beginning of the year
wind_capacity_data["Date"] = capacity_data["Year"].apply(
    lambda y: date(y + 1, 1, 1)
)

# Make a data frame with installed cap. at the beginning of the year
selected_country_codes = [countries.get(name=c).alpha_2 for c in COUNTRIES]
_df = wind_capacity_data.set_index(['Country code', 'Date']).loc[selected_country_codes]
wind_cap = pd.DataFrame({'Wind Onshore': _df['Cumulative onshore capacity'],
                         'Wind Offshore': _df['Cumulative offshore capacity']}).unstack(0)

Preview the capacity data for selected countries at the end of each year.

In [144]:
df = wind_capacity_data.loc[wind_capacity_data['Country code'].isin(selected_country_codes)].copy()
df['Country'] = [countries.get(alpha_2=cc).name for cc in df['Country code']]
wind_cap_selected = df.set_index(['Country', 'Year'])[['Cumulative onshore capacity', 'Cumulative offshore capacity']].unstack().round()
wind_cap_selected

Unnamed: 0_level_0,Cumulative onshore capacity,Cumulative onshore capacity,Cumulative offshore capacity,Cumulative offshore capacity
Year,2017,2018,2017,2018
Country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Austria,2844.0,3045.0,0.0,0.0
Belgium,1971.0,2175.0,876.0,1186.0
Denmark,4222.0,4430.0,1267.0,1329.0
Finland,1973.0,1970.0,71.0,71.0
France,13757.0,15307.0,0.0,2.0
Germany,50778.0,52931.0,5411.0,6380.0
Ireland,3346.0,3539.0,25.0,25.0
Italy,9506.0,9958.0,0.0,0.0
Netherlands,3259.0,3353.0,1118.0,1118.0
Norway,1193.0,1673.0,2.0,2.0


Write to disk for later use.

In [145]:
wind_cap_selected.to_csv('../data/intermediate/WindEurope_installed_cap.csv', header=True)

## Read installed capacity data for solar power

In [146]:
_df = (pd.read_csv('../data/raw/ENTSO-E_TP_installed_cap.csv', index_col=[0, 1])
         .loc[['Solar']]
         .dropna())
_df.index.name = 'Country code'
_df.columns = [date(int(y) + 1, 1, 1) for y in _df.columns]
_df.columns.name = 'Date'
solar_cap = _df.T
solar_cap

Unnamed: 0_level_0,Solar,Solar,Solar,Solar,Solar,Solar,Solar,Solar,Solar,Solar
Unnamed: 0_level_1,AT,BE,DE,DK,ES,FR,GB,IT,NL,PT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2018-01-01,1031.0,2953.0,40834.0,601.0,6720.0,7660.0,8566.0,4659.0,2039.0,261.0
2019-01-01,1193.0,2953.0,42804.0,1002.0,6722.0,7170.0,12471.0,4719.0,2584.0,272.0


## Interpolate installed capacity within the years

Expand the capacity data to the whole year and interpolate for each day.

In [147]:
# Create daily index for the whole period
idx = pd.date_range(capacity_data['Date'].min(), 
                    capacity_data['Date'].max(),
                    freq='D')

_df = pd.concat([wind_cap, solar_cap], axis=1)
installed_capacity = _df.reindex(idx).interpolate()
installed_capacity.round()

Unnamed: 0_level_0,Wind Onshore,Wind Onshore,Wind Onshore,Wind Onshore,Wind Onshore,Wind Onshore,Wind Onshore,Wind Onshore,Wind Onshore,Wind Onshore,...,Solar,Solar,Solar,Solar,Solar,Solar,Solar,Solar,Solar,Solar
Country code,AT,BE,DK,FI,FR,DE,IE,IT,NL,NO,...,AT,BE,DE,DK,ES,FR,GB,IT,NL,PT
2018-01-01,2844.0,1971.0,4222.0,1973.0,13757.0,50778.0,3346.0,9506.0,3259.0,1193.0,...,1031.0,2953.0,40834.0,601.0,6720.0,7660.0,8566.0,4659.0,2039.0,261.0
2018-01-02,2845.0,1972.0,4223.0,1973.0,13761.0,50784.0,3346.0,9507.0,3259.0,1194.0,...,1031.0,2953.0,40839.0,602.0,6720.0,7659.0,8577.0,4659.0,2040.0,261.0
2018-01-03,2845.0,1972.0,4223.0,1973.0,13766.0,50790.0,3347.0,9508.0,3260.0,1195.0,...,1032.0,2953.0,40845.0,603.0,6720.0,7657.0,8587.0,4659.0,2042.0,261.0
2018-01-04,2846.0,1973.0,4224.0,1973.0,13770.0,50796.0,3347.0,9510.0,3260.0,1196.0,...,1032.0,2953.0,40850.0,604.0,6720.0,7656.0,8598.0,4659.0,2043.0,261.0
2018-01-05,2846.0,1973.0,4225.0,1973.0,13774.0,50802.0,3348.0,9511.0,3260.0,1198.0,...,1033.0,2953.0,40856.0,605.0,6720.0,7655.0,8609.0,4660.0,2045.0,261.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-28,3043.0,2173.0,4428.0,1970.0,15290.0,52907.0,3537.0,9953.0,3352.0,1668.0,...,1191.0,2953.0,42782.0,998.0,6722.0,7175.0,12428.0,4718.0,2578.0,272.0
2018-12-29,3043.0,2173.0,4428.0,1970.0,15295.0,52913.0,3537.0,9954.0,3352.0,1669.0,...,1192.0,2953.0,42788.0,999.0,6722.0,7174.0,12439.0,4719.0,2580.0,272.0
2018-12-30,3044.0,2174.0,4429.0,1970.0,15299.0,52919.0,3538.0,9955.0,3352.0,1670.0,...,1192.0,2953.0,42793.0,1000.0,6722.0,7173.0,12450.0,4719.0,2581.0,272.0
2018-12-31,3044.0,2174.0,4429.0,1970.0,15303.0,52925.0,3538.0,9957.0,3353.0,1672.0,...,1193.0,2953.0,42799.0,1001.0,6722.0,7171.0,12460.0,4719.0,2583.0,272.0


## Calculation of capacity factors

Calculate capacity factors for each technology based on the interpolated installed capacity on that day.

In [149]:
for tech in GEN_TYPES:
    generation = pd.read_csv(f'../data/intermediate/Generation {tech}.csv', 
                             index_col=0, parse_dates=True)
    cf = pd.DataFrame(index=generation.index)
    for cc in generation.columns:
        gen = generation[cc].dropna()
        try:
            cap = pd.Series(installed_capacity[(tech, cc)], name='cap')
        except KeyError:
            continue
        _df = pd.DataFrame({'gen': gen,
                            'date': pd.DatetimeIndex(gen.index.date)
                           }).join(cap, on='date')
        cf[cc] = _df['gen'] / _df['cap']
    cf.to_csv(f'../data/intermediate/CF {tech}.csv', header=True)
        