## Modules

In [1]:
import os
import sys
sys.path.append('..')

from datetime import datetime, timedelta
import numpy as np
import pandas as pd

import requests
from bs4 import BeautifulSoup

## US Market holidays

### Raw dataframe

In [None]:
year = 2022
link = f'http://www.market-holidays.com/{year}'
tablelist = pd.read_html(link)
tablelist

### Concatenate all years and export to CSV

In [None]:
dfhday_ny = pd.DataFrame()

for year in range(1990, 2024):
    tablelist = pd.read_html(f'http://www.market-holidays.com/{year}')
    dfraw = tablelist[0].copy()
    dfhday = pd.DataFrame(columns=['date', 'wday', 'name'])
    dfhday['date'] = dfraw[1].apply(lambda x: datetime.strptime(x, '%B %d, %Y'))
    dfhday['wday'] = dfhday['date'].apply(lambda x: x.weekday() + 1)
    dfhday['name'] = dfraw[0]
    dfhday_ny = pd.concat([dfhday_ny, dfhday], axis=0)
    print(f'{year} US holiday done.')

dfhday_ny.set_index('date', inplace=True)
dfhday_ny['name'] = dfhday_ny['name'].replace('Martin Luther King, Jr. Day', 'MLK Day')\
                            .replace('Juneteenth National Independence Day', '619 Day')
dfhday_ny = dfhday_ny[dfhday_ny['name'] != 'Martin Luther King, Jr. Day(1-minute pause at noon)']
dfhday_ny.to_csv('holiday_ny.csv')

## HK market holidays

### Raw dataframe

In [None]:
year = 2022
link = f'https://www.gov.hk/en/about/abouthk/holiday/{year}.htm'
tablelist = pd.read_html(link)
tablelist[0]

### Concatenate all years and export to CSV

In [None]:
dfhday_hk = pd.DataFrame()

for year in range(2007, 2024):
    tablelist = pd.read_html(f'https://www.gov.hk/en/about/abouthk/holiday/{year}.htm')
    dfraw = tablelist[0].loc[1:]
    dfhday = pd.DataFrame(columns=['date', 'wday', 'name'])
    dfhday['date'] = dfraw[1].apply(lambda x: datetime.strptime(x + f',{year}', '%d %B,%Y'))
    dfhday['wday'] = dfhday['date'].apply(lambda x: x.weekday() + 1)
    dfhday['name'] = dfraw[0]
    dfhday_hk = pd.concat([dfhday_hk, dfhday], axis=0)
    print(f'{year} HK holiday done.')

dfhday_hk.set_index('date', inplace=True)
dfhday_hk.to_csv('holiday_hk.csv')

### Holiday files

In [4]:
nyhdayfile = open('holiday_ny.csv', 'r')
hkhdayfile = open('holiday_hk.csv', 'r')

hkhdaylines = hkhdayfile.readlines()
hkhdaylist = [row.split(',')[0] for row in hkhdaylines[1:]]
print(hkhdaylist[-24:])

nyhdaylines = nyhdayfile.readlines()
nyhdaylist = [row.split(',')[0] for row in nyhdaylines[1:]]
print(nyhdaylist[-24:])


['2022-06-03', '2022-07-01', '2022-09-12', '2022-10-01', '2022-10-04', '2022-12-26', '2022-12-27', '2023-01-02', '2023-01-23', '2023-01-24', '2023-01-25', '2023-04-05', '2023-04-07', '2023-04-08', '2023-04-10', '2023-05-01', '2023-05-26', '2023-06-22', '2023-07-01', '2023-09-30', '2023-10-02', '2023-10-23', '2023-12-25', '2023-12-26']
['2021-07-05', '2021-09-06', '2021-11-25', '2021-12-24', '2021-12-31', '2022-01-17', '2022-02-21', '2022-04-15', '2022-05-30', '2022-06-20', '2022-07-04', '2022-09-05', '2022-11-24', '2022-12-26', '2023-01-02', '2023-01-16', '2023-02-20', '2023-04-07', '2023-05-29', '2023-06-19', '2023-07-04', '2023-09-04', '2023-11-23', '2023-12-25']
