In [19]:
import pandas as pd
import numpy as np
from datetime import date
import holidays

# Define the date range
start_date = "2023-12-01 00:00:00"
end_date = "2024-12-31 23:00:00"

# Generate a date range with hourly frequency
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

# Create the DateHourDim DataFrame
date_hour_dim = pd.DataFrame(date_range, columns=['Datetime'])

# Populate the columns
date_hour_dim['DateHourKey'] = date_hour_dim['Datetime'].dt.strftime('%Y%m%d%H').astype(int)
date_hour_dim['Hour'] = date_hour_dim['Datetime'].dt.hour
date_hour_dim['TimeOfDay'] = np.where(date_hour_dim['Hour'] < 12, 'AM', 'PM')
date_hour_dim['DayNumber'] = date_hour_dim['Datetime'].dt.dayofyear
date_hour_dim['WeekDayNumber'] = date_hour_dim['Datetime'].dt.weekday
date_hour_dim['WeekDayName'] = date_hour_dim['Datetime'].dt.strftime('%A')
date_hour_dim['WeekendFlag'] = np.where(date_hour_dim['WeekDayNumber'] >= 5, 1, 0)
date_hour_dim['MonthNumber'] = date_hour_dim['Datetime'].dt.month
date_hour_dim['MonthName'] = date_hour_dim['Datetime'].dt.strftime('%B')
date_hour_dim['Year'] = date_hour_dim['Datetime'].dt.year

# Get US holidays
us_holidays = holidays.US()

# Function to determine if a date is a holiday
def is_holiday(date):
    date_only = date.date()
    if date_only in us_holidays:
        return 1, us_holidays.get(date_only)
    return 0, 'None'  # Using 'None' as a placeholder for no holiday

# Apply the holiday function
date_hour_dim['HolidayFlag'], date_hour_dim['HolidayName'] = zip(*date_hour_dim['Datetime'].apply(is_holiday))

# Ensure the WeekendFlag and HolidayFlag are binary (0 or 1)
date_hour_dim['WeekendFlag'] = date_hour_dim['WeekendFlag'].astype(int)
date_hour_dim['HolidayFlag'] = date_hour_dim['HolidayFlag'].astype(int)

# Drop duplicates to ensure unique DateHourKey entries
date_hour_dim = date_hour_dim.drop_duplicates(subset=['DateHourKey'])

# Display the DateHourDim DataFrame
print(date_hour_dim)

# To save it to a CSV file, uncomment the next line
# date_hour_dim.to_csv('DateHourDim.csv', index=False)


                Datetime  DateHourKey  Hour TimeOfDay  DayNumber  \
0    2023-12-01 00:00:00   2023120100     0        AM        335   
1    2023-12-01 01:00:00   2023120101     1        AM        335   
2    2023-12-01 02:00:00   2023120102     2        AM        335   
3    2023-12-01 03:00:00   2023120103     3        AM        335   
4    2023-12-01 04:00:00   2023120104     4        AM        335   
...                  ...          ...   ...       ...        ...   
9523 2024-12-31 19:00:00   2024123119    19        PM        366   
9524 2024-12-31 20:00:00   2024123120    20        PM        366   
9525 2024-12-31 21:00:00   2024123121    21        PM        366   
9526 2024-12-31 22:00:00   2024123122    22        PM        366   
9527 2024-12-31 23:00:00   2024123123    23        PM        366   

      WeekDayNumber WeekDayName  WeekendFlag  MonthNumber MonthName  Year  \
0                 4      Friday            0           12  December  2023   
1                 4      Frid

  date_range = pd.date_range(start=start_date, end=end_date, freq='H')


In [20]:
date_hour_dim

Unnamed: 0,Datetime,DateHourKey,Hour,TimeOfDay,DayNumber,WeekDayNumber,WeekDayName,WeekendFlag,MonthNumber,MonthName,Year,HolidayFlag,HolidayName
0,2023-12-01 00:00:00,2023120100,0,AM,335,4,Friday,0,12,December,2023,0,
1,2023-12-01 01:00:00,2023120101,1,AM,335,4,Friday,0,12,December,2023,0,
2,2023-12-01 02:00:00,2023120102,2,AM,335,4,Friday,0,12,December,2023,0,
3,2023-12-01 03:00:00,2023120103,3,AM,335,4,Friday,0,12,December,2023,0,
4,2023-12-01 04:00:00,2023120104,4,AM,335,4,Friday,0,12,December,2023,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9523,2024-12-31 19:00:00,2024123119,19,PM,366,1,Tuesday,0,12,December,2024,0,
9524,2024-12-31 20:00:00,2024123120,20,PM,366,1,Tuesday,0,12,December,2024,0,
9525,2024-12-31 21:00:00,2024123121,21,PM,366,1,Tuesday,0,12,December,2024,0,
9526,2024-12-31 22:00:00,2024123122,22,PM,366,1,Tuesday,0,12,December,2024,0,


In [21]:
date_hour_dim['HolidayName'].value_counts()

HolidayName
None                                    9240
Christmas Day                             48
New Year's Day                            24
Martin Luther King Jr. Day                24
Washington's Birthday                     24
Memorial Day                              24
Juneteenth National Independence Day      24
Independence Day                          24
Labor Day                                 24
Columbus Day                              24
Veterans Day                              24
Thanksgiving                              24
Name: count, dtype: int64

In [23]:
date_hour_dim['HolidayFlag'].value_counts()

HolidayFlag
0    9240
1     288
Name: count, dtype: int64

In [None]:
DateHourDim = date_hour_dim

In [24]:
DateHourDim

Unnamed: 0,Datetime,DateHourKey,Hour,TimeOfDay,DayNumber,WeekDayNumber,WeekDayName,WeekendFlag,MonthNumber,MonthName,Year,HolidayFlag,HolidayName
0,2023-12-01 00:00:00,2023120100,0,AM,335,4,Friday,0,12,December,2023,0,
1,2023-12-01 01:00:00,2023120101,1,AM,335,4,Friday,0,12,December,2023,0,
2,2023-12-01 02:00:00,2023120102,2,AM,335,4,Friday,0,12,December,2023,0,
3,2023-12-01 03:00:00,2023120103,3,AM,335,4,Friday,0,12,December,2023,0,
4,2023-12-01 04:00:00,2023120104,4,AM,335,4,Friday,0,12,December,2023,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9523,2024-12-31 19:00:00,2024123119,19,PM,366,1,Tuesday,0,12,December,2024,0,
9524,2024-12-31 20:00:00,2024123120,20,PM,366,1,Tuesday,0,12,December,2024,0,
9525,2024-12-31 21:00:00,2024123121,21,PM,366,1,Tuesday,0,12,December,2024,0,
9526,2024-12-31 22:00:00,2024123122,22,PM,366,1,Tuesday,0,12,December,2024,0,
