In [2]:
from datetime import date
from glob import glob

import pandas as pd
from pycountry import countries

from const import COUNTRIES

## Read installed capacity data

Read capacity data from extrnally downloaded files. Each file has data for one year.

In [3]:
frames = list()
for filepath in glob("../data/external/Cumulative_capacity_table_data_*.csv"):
    frames.append(pd.read_csv(filepath))
capacity_data = pd.concat(frames)

Define functions to get country code from country names (incl. fuzzy search).

In [4]:
def get_country_alpha2(country: str) -> str:
    """Get two-letter country code for a country
    """
    # Deal with some special cases first
    if country == "UK":
        cc = "GB"
    elif country == "FYROM":
        cc = "MK"
    elif country == 'Kosovo':
        cc = 'XK'
    else:
        try:
            cc = countries.get(name=country).alpha_2
        except AttributeError:
            try:
                cc = countries.search_fuzzy(country)[0].alpha_2
            except LookupError:
                cc = country
    return cc

Find country codes for the capacity data and set the date of each record to the beginning of the next year.

In [5]:
# Set the country codes
capacity_data["Country code"] = capacity_data["Country"].apply(get_country_alpha2)

# Set dates to beginning of the year
capacity_data["Date"] = capacity_data["Year"].apply(
    lambda y: date(y + 1, 1, 1)
)

Preview the capacity data for selected countries at the end of each year.

In [23]:
df = capacity_data.loc[capacity_data['Country code'].isin([countries.get(name=c).alpha_2 for c in COUNTRIES])].copy()
df['Country'] = [countries.get(alpha_2=cc).name for cc in df['Country code']]
wind_cap_selected = df.set_index(['Country', 'Year'])[['Cumulative onshore capacity', 'Cumulative offshore capacity']].unstack().round()
wind_cap_selected

Unnamed: 0_level_0,Cumulative onshore capacity,Cumulative onshore capacity,Cumulative offshore capacity,Cumulative offshore capacity
Year,2017,2018,2017,2018
Country,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Austria,2844.0,3045.0,0.0,0.0
Belgium,1971.0,2175.0,876.0,1186.0
Denmark,4222.0,4430.0,1267.0,1329.0
Finland,1973.0,1970.0,71.0,71.0
France,13757.0,15307.0,0.0,2.0
Germany,50778.0,52931.0,5411.0,6380.0
Ireland,3346.0,3539.0,25.0,25.0
Italy,9506.0,9958.0,0.0,0.0
Netherlands,3259.0,3353.0,1118.0,1118.0
Norway,1193.0,1673.0,2.0,2.0


In [25]:
wind_cap_selected.to_csv('../data/intermediate/WindEurope_installed_cap.csv', header=True)

## Interpolate installed capacity within the years

Expand the capacity data to the whole year and interpolate for each day.

In [7]:
# Create daily index for the whole period
idx = pd.date_range(capacity_data['Date'].min(), 
                    capacity_data['Date'].max(),
                    freq='D')

df = capacity_data[['Country code', 'Date']].copy()
df['Onshore'] = capacity_data["Cumulative onshore capacity"]
df['Offshore'] = capacity_data["Cumulative offshore capacity"]
wind_capacity = (df.set_index(["Country code", "Date"])
                   .unstack(0).reindex(idx).interpolate())
wind_capacity.round()

Unnamed: 0_level_0,Onshore,Onshore,Onshore,Onshore,Onshore,Onshore,Onshore,Onshore,Onshore,Onshore,...,Offshore,Offshore,Offshore,Offshore,Offshore,Offshore,Offshore,Offshore,Offshore,Offshore
Country code,AT,BA,BE,BG,CH,CY,CZ,DE,DK,EE,...,PT,RO,RS,RU,SE,SI,SK,TR,UA,XK
2018-01-01,2844.0,0.0,1971.0,691.0,75.0,158.0,303.0,50778.0,4222.0,310.0,...,0.0,0.0,0.0,0.0,202.0,0.0,0.0,0.0,0.0,0.0
2018-01-02,2845.0,0.0,1972.0,691.0,75.0,158.0,303.0,50784.0,4223.0,310.0,...,0.0,0.0,0.0,0.0,202.0,0.0,0.0,0.0,0.0,0.0
2018-01-03,2845.0,0.0,1972.0,691.0,75.0,158.0,303.0,50790.0,4223.0,310.0,...,0.0,0.0,0.0,0.0,202.0,0.0,0.0,0.0,0.0,0.0
2018-01-04,2846.0,0.0,1973.0,691.0,75.0,158.0,303.0,50796.0,4224.0,310.0,...,0.0,0.0,0.0,0.0,202.0,0.0,0.0,0.0,0.0,0.0
2018-01-05,2846.0,1.0,1973.0,691.0,75.0,158.0,303.0,50802.0,4225.0,310.0,...,0.0,0.0,0.0,0.0,202.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-28,3043.0,50.0,2173.0,691.0,75.0,158.0,317.0,52907.0,4428.0,310.0,...,0.0,0.0,0.0,0.0,192.0,0.0,0.0,0.0,0.0,0.0
2018-12-29,3043.0,50.0,2173.0,691.0,75.0,158.0,317.0,52913.0,4428.0,310.0,...,0.0,0.0,0.0,0.0,192.0,0.0,0.0,0.0,0.0,0.0
2018-12-30,3044.0,50.0,2174.0,691.0,75.0,158.0,317.0,52919.0,4429.0,310.0,...,0.0,0.0,0.0,0.0,192.0,0.0,0.0,0.0,0.0,0.0
2018-12-31,3044.0,50.0,2174.0,691.0,75.0,158.0,317.0,52925.0,4429.0,310.0,...,0.0,0.0,0.0,0.0,192.0,0.0,0.0,0.0,0.0,0.0


## Calculation of capacity factors

Calculate capacity factors for each technology based on the interpolated installed capacity on that day.

In [8]:
for tech in ['Onshore', 'Offshore']:
    wind_generation = pd.read_csv(f'../data/intermediate/Generation Wind {tech}.csv', 
                                  index_col=0, parse_dates=True)
    wind_cf = pd.DataFrame(index=wind_generation.index)
    for cc in wind_generation.columns:
        gen = wind_generation[cc].dropna()
        cap = pd.Series(wind_capacity[(tech, cc)], name='cap')
        df = pd.DataFrame({'gen': gen,
                           'date': pd.DatetimeIndex(gen.index.date)
                           }).join(cap, on='date')
        wind_cf[cc] = df['gen'] / df['cap']
    wind_cf.to_csv(f'../data/processed/CF Wind {tech}.csv', header=True)
        