In [1]:
import pandas as pd

from const import TZ, START, END, MIN_TIMESTEP, GEN_TYPES

MAX_GAP_TO_FILL = 4 

Define a function to fill in the gaps in the data.

In [2]:
def expand_to_full_length(ts: pd.Series, coverage: pd.DataFrame, max_gap: int = MAX_GAP_TO_FILL) -> pd.Series:
    """Reindex a time series to the full length and fill in gaps 
    
    Args:
        ts: Time series to process
        max_gap: Maximum length of gap to fill (time steps)
        coverage: Data frame for coverage statistics
    """
    ts_ = ts.dropna()  # Drop missing values
    if not len(ts_):
        return ts
    freq = ts_.index[1] - ts_.index[0]  # Infer the frequency
    assert freq >= pd.Timedelta(MIN_TIMESTEP)  # Check we got something reasonable
    # make the full time index and reindex
    time_idx = pd.date_range(START, END, tz=TZ, freq=freq, closed='left')
    reindexed = ts_.reindex(time_idx)
    coverage.loc[ts.name, 'length'] = len(reindexed)
    if reindexed.isna().sum():
        print(f"Filling gaps for {ts.name}. . .")
        coverage.loc[ts.name, 'original'] = 1 - reindexed.isna().sum() / len(reindexed)
        interpolated = reindexed.interpolate(method='time', limit_area='inside', 
                                             limit=max_gap
                                            )
        coverage.loc[ts.name, 'interpolated'] = 1 - interpolated.isna().sum() / len(interpolated)
        coverage.loc[ts.name, 'missing'] = interpolated.isna().sum()
        return interpolated
    else:
        coverage.loc[ts.name, 'original'] = 1
        return reindexed

Fill in the gaps in the raw generation data by interpolating.

In [3]:
for tech in GEN_TYPES:
    gen_raw = pd.read_csv(f"../data/raw/ENTSO-E_TP_generation_{tech}.csv", 
                          index_col=0, parse_dates=True)
    coverage = pd.DataFrame()
    gen_clean = gen_raw.apply(expand_to_full_length, coverage=coverage, max_gap=4)
    
    # Write to file
    filename = f'../data/intermediate/Generation {tech}.csv'
    gen_clean.to_csv(filename, header=True)
    print(f'Wrote {filename}')
    coverage.to_csv(f'../data/intermediate/Coverage {tech}.csv')

Filling gaps for ES. . .
Filling gaps for FR. . .
Filling gaps for GB. . .
Filling gaps for IT. . .
Filling gaps for NL. . .
Wrote ../data/intermediate/Generation Solar.csv
Filling gaps for ES. . .
Filling gaps for FI. . .
Filling gaps for FR. . .
Filling gaps for GB. . .
Filling gaps for IE. . .
Filling gaps for IT. . .
Filling gaps for NL. . .
Filling gaps for NO. . .
Filling gaps for SE. . .
Wrote ../data/intermediate/Generation Wind Onshore.csv
Filling gaps for ES. . .
Filling gaps for GB. . .
Filling gaps for NL. . .
Wrote ../data/intermediate/Generation Wind Offshore.csv


Fill in gaps in the raw load data by interpolating.

In [6]:
coverage = pd.DataFrame()
load_raw = pd.read_csv(f"../data/raw/ENTSO-E_TP_load.csv", 
                      index_col=0, parse_dates=True)    
load_clean = load_raw.apply(expand_to_full_length, coverage=coverage, max_gap=4)

# Write to file
filename = '../data/intermediate/Load.csv'
load_clean.to_csv(filename, header=True)
print(f'Wrote {filename}')
coverage.to_csv('../data/intermediate/Coverage Load.csv')

Filling gaps for DE. . .
Filling gaps for ES. . .
Filling gaps for FI. . .
Filling gaps for FR. . .
Filling gaps for GB. . .
Filling gaps for IE. . .
Filling gaps for NO. . .
Wrote ../data/intermediate/Load.csv
