# refinement with "bank holidays" and (bouwvak) vakanties...

Twan: holidays library takes official (NL) holidays. some holidays potentially associated with lower electricy load (TBC) are missing:
Goede Vrijdag 15-04-2022, 07-04-2023, 29-03-2024, (18-04-2025)
(vrij)daags na Hemelvaartsdag 27-05-2022, 19-05-2023, 10-05-2024

in below code, bouwvak vakanties, local timezone and DST are added entities!!


In [6]:
import pandas as pd
import numpy as np
import holidays
from datetime import date

# Genereer tijdstempel per uur tussen 2022 en 2024 (UTC)
time_df = pd.DataFrame({
    "datetime": pd.date_range("2022-01-01", "2024-12-31", freq="h", tz="UTC")
})

# Basis tijdkolommen
time_df["hour"] = time_df["datetime"].dt.hour
time_df["weekday"] = time_df["datetime"].dt.weekday  # Maandag = 0, Zondag = 6
time_df["month"] = time_df["datetime"].dt.month
time_df["date"] = time_df["datetime"].dt.date  # Nodig voor holiday lookup

# Cyclical encoding
time_df["hour_sin"] = np.sin(2 * np.pi * time_df["hour"] / 24)
time_df["hour_cos"] = np.cos(2 * np.pi * time_df["hour"] / 24)
time_df["weekday_sin"] = np.sin(2 * np.pi * time_df["weekday"] / 7)
time_df["weekday_cos"] = np.cos(2 * np.pi * time_df["weekday"] / 7)
time_df["month_sin"] = np.sin(2 * np.pi * time_df["month"] / 12)
time_df["month_cos"] = np.cos(2 * np.pi * time_df["month"] / 12)

# Convert from UTC to a local timezone (e.g., Europe/Amsterdam) to capture DST changes
time_df["local_datetime"] = time_df["datetime"].dt.tz_convert("Europe/Amsterdam")

# Use apply to get DST info — returns timedelta
time_df["is_dst"] = time_df["local_datetime"].apply(lambda x: int(x.dst() != pd.Timedelta(0)))

# Flags
# Weekend = Zaterdag (5) of Zondag (6)
time_df["is_weekend"] = time_df["weekday"].isin([5, 6]).astype(int)

# Nationale feestdagen Nederland
nl_holidays = holidays.country_holidays("NL", years=[2022, 2023, 2024])
time_df["is_holiday"] = time_df["date"].isin(nl_holidays).astype(int)

# Combinatie: niet-werkdag
time_df["is_non_working_day"] = ((time_df["is_weekend"] == 1) | (time_df["is_holiday"] == 1)).astype(int)

# Startdata van de bouwvak per jaar
bouwvak_start = [
    date(2022, 7, 25),
    date(2023, 7, 22),
    date(2024, 7, 22),
    
] # date(2025, 7, 21), date(2026, 7, 18), date(2027, 7, 24), date(2028, 7, 22), date(2029, 7, 21) are further start dates

# Einddata van de bouwvak per jaar
bouwvak_end = [
    date(2022, 8, 26),
    date(2023, 8, 26),
    date(2024, 8, 23),
] # date(2025, 8, 22), date(2026, 8, 22), date(2027, 8, 29), date(2028, 8, 27), date(2029, 8, 24) further end dates

def is_bouwvak(d):
    return any(start <= d <= end for start, end in zip(bouwvak_start, bouwvak_end))

time_df['is_bouwvak'] = time_df['date'].apply(is_bouwvak).astype('int64')




In [7]:
print(time_df.info())
print(time_df.describe())
print(time_df.head(24))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26281 entries, 0 to 26280
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype                           
---  ------              --------------  -----                           
 0   datetime            26281 non-null  datetime64[ns, UTC]             
 1   hour                26281 non-null  int32                           
 2   weekday             26281 non-null  int32                           
 3   month               26281 non-null  int32                           
 4   date                26281 non-null  object                          
 5   hour_sin            26281 non-null  float64                         
 6   hour_cos            26281 non-null  float64                         
 7   weekday_sin         26281 non-null  float64                         
 8   weekday_cos         26281 non-null  float64                         
 9   month_sin           26281 non-null  float64                         
 10