In [1]:
import pandas as pd

from const import TZ, START, END, MIN_TIMESTEP, GEN_TYPES

Define a function to fill in the gaps in the data.

In [2]:
def expand_to_full_length(ts: pd.Series) -> pd.Series:
    """Reindex a time series to the full length and fill in gaps 
    """
    ts_ = ts.dropna()  # Drop missing values
    if not len(ts_):
        return ts
    freq = ts_.index[1] - ts_.index[0]  # Infer the frequency
    assert freq >= pd.Timedelta(MIN_TIMESTEP)  # Check we got something reasonable
    # make the full time index and reindex
    time_idx = pd.date_range(START, END, tz=TZ, freq=freq, closed='left')
    return ts_.reindex(time_idx).interpolate(method='time')

Fill in the gaps in teh raw data by interpolating.

In [3]:
for tech in GEN_TYPES:
    wind_gen_raw = pd.read_csv(f"../data/raw/ENTSO-E_TP_generation_{tech}.csv", 
                                      index_col=0, parse_dates=True)    
    wind_gen_clean = wind_gen_raw.apply(expand_to_full_length)

    # Write to file
    filename = f'../data/intermediate/Generation {tech}.csv'
    wind_gen_clean.to_csv(filename, header=True)