In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from copy import deepcopy

In [2]:
start_date = datetime(2000, 1, 1)
end_date = datetime(2020, 4, 1)
period = timedelta(hours=1)
holidays = ['01-01', '06-01', '07-01', '01-05', '24-05', '02-08', '08-09', '11-10', '25-10', '08-12']

In [3]:
curr_date = deepcopy(start_date)
data = {}

while curr_date <= end_date:
    timestamp = curr_date.strftime("%Y-%m-%d %H:%M:%S")
    hour_of_day = curr_date.hour
    day_of_week = curr_date.weekday()
    month_of_yr = curr_date.month - 1
    
    hour_sin = np.sin(2*np.pi*hour_of_day/24)
    hour_cos = np.cos(2*np.pi*hour_of_day/24)
    weekday_sin = np.sin(2*np.pi*day_of_week/7)
    weekday_cos = np.cos(2*np.pi*day_of_week/7)
    month_sin = np.sin(2*np.pi*month_of_yr/7)
    month_cos = np.cos(2*np.pi*month_of_yr/7)
    
    weekend = 1 if day_of_week == 5 or day_of_week == 6 else 0
    holiday = 1 if curr_date.strftime("%d-%m") in holidays else 0
    
    data[timestamp] = (hour_sin, hour_cos, weekday_sin, weekday_cos, 
                       month_sin, month_cos, weekend, holiday)
    
    curr_date += period

In [4]:
df = pd.DataFrame.from_dict(data, orient='index', 
                            columns=['hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos',
                                'month_sin', 'month_cos', 'weekend', 'holiday'])

In [5]:
df.head(25)

Unnamed: 0,hour_sin,hour_cos,weekday_sin,weekday_cos,month_sin,month_cos,weekend,holiday
2000-01-01 00:00:00,0.0,1.0,-0.974928,-0.222521,0.0,1.0,1,1
2000-01-01 01:00:00,0.258819,0.9659258,-0.974928,-0.222521,0.0,1.0,1,1
2000-01-01 02:00:00,0.5,0.8660254,-0.974928,-0.222521,0.0,1.0,1,1
2000-01-01 03:00:00,0.7071068,0.7071068,-0.974928,-0.222521,0.0,1.0,1,1
2000-01-01 04:00:00,0.8660254,0.5,-0.974928,-0.222521,0.0,1.0,1,1
2000-01-01 05:00:00,0.9659258,0.258819,-0.974928,-0.222521,0.0,1.0,1,1
2000-01-01 06:00:00,1.0,6.123234000000001e-17,-0.974928,-0.222521,0.0,1.0,1,1
2000-01-01 07:00:00,0.9659258,-0.258819,-0.974928,-0.222521,0.0,1.0,1,1
2000-01-01 08:00:00,0.8660254,-0.5,-0.974928,-0.222521,0.0,1.0,1,1
2000-01-01 09:00:00,0.7071068,-0.7071068,-0.974928,-0.222521,0.0,1.0,1,1


In [6]:
df.to_csv('./data/extras', index=True)