## Modules

In [1]:
import os
import sys
sys.path.append('..')

from datetime import datetime, timedelta
import numpy as np
import pandas as pd

import requests
from bs4 import BeautifulSoup

## US Market holidays

### Raw dataframe

In [None]:
year = 2022
link = f'http://www.market-holidays.com/{year}'
tablelist = pd.read_html(link)
tablelist

### Concatenate all years and export to CSV

In [None]:
dfhday_ny = pd.DataFrame()

for year in range(1990, 2024):
    tablelist = pd.read_html(f'http://www.market-holidays.com/{year}')
    dfraw = tablelist[0].copy()
    dfhday = pd.DataFrame(columns=['date', 'wday', 'name'])
    dfhday['date'] = dfraw[1].apply(lambda x: datetime.strptime(x, '%B %d, %Y'))
    dfhday['wday'] = dfhday['date'].apply(lambda x: x.weekday() + 1)
    dfhday['name'] = dfraw[0]
    dfhday_ny = pd.concat([dfhday_ny, dfhday], axis=0)
    print(f'{year} US holiday done.')

dfhday_ny.set_index('date', inplace=True)
dfhday_ny['name'] = dfhday_ny['name'].replace('Martin Luther King, Jr. Day', 'MLK Day')\
                            .replace('Juneteenth National Independence Day', '619 Day')
dfhday_ny = dfhday_ny[dfhday_ny['name'] != 'Martin Luther King, Jr. Day(1-minute pause at noon)']
dfhday_ny.to_csv('holiday_ny.csv')

## HK market holidays

### Raw dataframe

In [None]:
year = 2022
link = f'https://www.gov.hk/en/about/abouthk/holiday/{year}.htm'
tablelist = pd.read_html(link)
tablelist[0]

### Concatenate all years and export to CSV

In [None]:
dfhday_hk = pd.DataFrame()

for year in range(2007, 2024):
    tablelist = pd.read_html(f'https://www.gov.hk/en/about/abouthk/holiday/{year}.htm')
    dfraw = tablelist[0].loc[1:]
    dfhday = pd.DataFrame(columns=['date', 'wday', 'name'])
    dfhday['date'] = dfraw[1].apply(lambda x: datetime.strptime(x + f',{year}', '%d %B,%Y'))
    dfhday['wday'] = dfhday['date'].apply(lambda x: x.weekday() + 1)
    dfhday['name'] = dfraw[0]
    dfhday_hk = pd.concat([dfhday_hk, dfhday], axis=0)
    print(f'{year} HK holiday done.')

dfhday_hk.set_index('date', inplace=True)
dfhday_hk.to_csv('holiday_hk.csv')

## Handling trading days

### Holiday files

In [2]:
# US stock market holidays
holidayfile_ny = open(os.path.join('holiday_ny.csv'), 'r')
holidaylines_ny = holidayfile_ny.readlines()
holidaylist_ny = [datetime.strptime(row.split(',')[0], '%Y-%m-%d') for row in holidaylines_ny[1:]]

# HK stock market holidays
holidayfile_hk = open(os.path.join('holiday_hk.csv'), 'r')
holidaylines_hk = holidayfile_hk.readlines()
holidaylist_hk = [datetime.strptime(row.split(',')[0], '%Y-%m-%d') for row in holidaylines_hk[1:]]

### Trading days

In [3]:
def getwkdays(startyr=1997, endyr=2046, form='%Y-%m-%d'):
    """Get all working days (non-weekend) to string in a list."""
    dtlist = []
    date = datetime.strptime(f'{startyr}-01-01', '%Y-%m-%d')
    while (date.year >= startyr) and (date.year <= endyr):
        if date.weekday() <= 4:
            dtlist.append(date.strftime(form))
        date += timedelta(days=1)

    return dtlist

workdtlist0 = getwkdays()  #  Format yyyy-mm-dd
workdtlist1 = getwkdays(form='%Y%m%d')  # Format yyyymmdd
workdtlist2 = getwkdays(form='%y%m%d')  # Format yymmdd

In [9]:
def gettradedays(holidaylist, startdt=datetime(2020, 1, 1), enddt=datetime(2022, 12, 31), form='%Y-%m-%d'):
    """Get all trading day spanning a period, excluding holidays."""
    if form == '%Y-%m-%d':
        workdtlist = workdtlist0
    elif form == '%Y%m%d':
        workdtlist = workdtlist1
    elif form == '%y%m%d':
        workdtlist = workdtlist2
    else:
        workdtlist = getwkdays(form=form)
        
    startstr = startdt.strftime(form)
    endstr = enddt.strftime(form)
    holidaystrlist = [date.strftime(form) for date in holidaylist]
    tdlist = [dtstr for dtstr in workdtlist if (dtstr >= startstr) and (dtstr <= endstr)]
    tdlist = [dtstr for dtstr in tdlist if dtstr not in holidaystrlist]

    return tdlist

In [10]:
tdaylist_hk1 = gettradedays(holidaylist_hk)
print(tdaylist_hk1[-20:])

['2022-12-01', '2022-12-02', '2022-12-05', '2022-12-06', '2022-12-07', '2022-12-08', '2022-12-09', '2022-12-12', '2022-12-13', '2022-12-14', '2022-12-15', '2022-12-16', '2022-12-19', '2022-12-20', '2022-12-21', '2022-12-22', '2022-12-23', '2022-12-28', '2022-12-29', '2022-12-30']


### Get latest trading day

In [11]:
def getlatesttradingday(holidaylist, offset=6, form='%Y-%m-%d'):
    """Obtain the latest trading date."""
    today = datetime.today() - timedelta(days=1, hours=offset)
    earlyday = today - timedelta(days=30)
    tdlist = [dtstr for dtstr in gettradedays(holidaylist, earlyday, today, form)]

    return tdlist[-1]

In [12]:
print(getlatesttradingday(holidaylist_ny))
print(getlatesttradingday(holidaylist_hk))

2022-11-29
2022-11-29


### HK Futures monthly settlement days

In [None]:
def getsetdate(monthstr='JAN-21'):
    """Obtain HK settlement date in a month."""
    monthstr = monthstr.capitalize()
    monthstr = '20%s-%s'%(monthstr[-2:], monthstr[:3])
    testdate = datetime.strptime(monthstr, '%Y-%b')
    yearwdaylist = get_hkexday_year(testdate.year, 0)
    mthdaylist = [testdate]
    while testdate.month == (testdate + timedelta(days=1)).month:
        testdate += timedelta(days=1)
        mthdaylist.append(testdate)
    mthdaylist = [date.strftime('%Y-%m-%d') for date in mthdaylist]
    mthdaylist = [dtstr for dtstr in mthdaylist if dtstr in yearwdaylist]
    setdate = datetime.strptime(mthdaylist[-2], '%Y-%m-%d')
    return setdate


In [20]:
month1 = 'NOV-22'
monthstart1 = datetime.strptime(month1, '%b-%y')
monthstr1 = monthstart1.strftime('%Y-%m')
year1 = monthstart1.year
month1 = monthstart1.month
tdaylist1 = gettradedays(holidaylist_hk, datetime(year1, 1, 1), datetime(year1, 12, 31))
tdaylist1 = [dtstr for dtstr in tdaylist1 if dtstr[:7] == monthstr1] 
tdaylist1[-2]

'2022-11-29'