## Modules

In [None]:
import os
import sys
sys.path.append('..')

from datetime import datetime, timedelta
import numpy as np
import pandas as pd

import requests
from bs4 import BeautifulSoup

## US Market holidays

### Raw dataframe

In [None]:
year = 2022
link = f'http://www.market-holidays.com/{year}'
tablelist = pd.read_html(link)
tablelist

### Concatenate all years and export to CSV

In [None]:
dfhday_ny = pd.DataFrame()

for year in range(1990, 2024):
    tablelist = pd.read_html(f'http://www.market-holidays.com/{year}')
    dfraw = tablelist[0].copy()
    dfhday = pd.DataFrame(columns=['date', 'wday', 'name'])
    dfhday['date'] = dfraw[1].apply(lambda x: datetime.strptime(x, '%B %d, %Y'))
    dfhday['wday'] = dfhday['date'].apply(lambda x: x.weekday() + 1)
    dfhday['name'] = dfraw[0]
    dfhday_ny = pd.concat([dfhday_ny, dfhday], axis=0)
    print(f'{year} US holiday done.')

dfhday_ny.set_index('date', inplace=True)
dfhday_ny['name'] = dfhday_ny['name'].replace('Martin Luther King, Jr. Day', 'MLK Day')\
                            .replace('Juneteenth National Independence Day', '619 Day')
dfhday_ny = dfhday_ny[dfhday_ny['name'] != 'Martin Luther King, Jr. Day(1-minute pause at noon)']
dfhday_ny.to_csv('holiday_ny.csv')

## HK market holidays

### Raw dataframe

In [None]:
year = 2022
link = f'https://www.gov.hk/en/about/abouthk/holiday/{year}.htm'
tablelist = pd.read_html(link)
tablelist[0]

### Restructure holiday dataframe

In [None]:
dfraw1 = tablelist[0].loc[1:]

dfhday2 = pd.DataFrame(columns=['date', 'wday', 'name'])
dfhday2['date'] = dfraw1[1].apply(lambda x: datetime.strptime(x + f',{year}', '%d %B,%Y'))
dfhday2['wday'] = dfhday2['date'].apply(lambda x: x.weekday() + 1)
dfhday2['name'] = dfraw1[0]
dfhday2

### Concatenate all years and export to CSV

In [None]:
dfhday_hk = pd.DataFrame()

for year in range(2007, 2024):
    tablelist = pd.read_html(f'https://www.gov.hk/en/about/abouthk/holiday/{year}.htm')
    dfraw = tablelist[0].loc[1:]
    dfhday = pd.DataFrame(columns=['date', 'wday', 'name'])
    dfhday['date'] = dfraw[1].apply(lambda x: datetime.strptime(x + f',{year}', '%d %B,%Y'))
    dfhday['wday'] = dfhday['date'].apply(lambda x: x.weekday() + 1)
    dfhday['name'] = dfraw[0]
    dfhday_hk = pd.concat([dfhday_hk, dfhday], axis=0)
    print(f'{year} HK holiday done.')

dfhday_hk.set_index('date', inplace=True)
dfhday_hk.to_csv('holiday_hk.csv')