In [None]:
import datetime
import requests
import boto3
import pandas as pd
import io
import re

In [None]:
url_template = 'https://www.twse.com.tw/holidaySchedule/holidaySchedule?response=csv&queryYear={}'
filename_tamplate = 'holidaySchedule_{}.csv'

def get_tw_year(year: int) -> int:
    return year - 1911

year = datetime.datetime.today().year
tw_year = get_tw_year(year)

url = url_template.format(tw_year)
filename = filename_tamplate.format(tw_year)

s3_bucket = 'indextracker'
s3_key = f'tw/{filename}'

## load data directly

In [None]:
def download_file(filepath, url):
    with requests.get(url, stream=True) as response:
        with open(filepath, 'wb') as f:
            for content in response.iter_content():
                f.write(content)

# download_file(f'.\{filename}', url)

In [None]:
def check_s3_key_exist(bucket, key):
    from botocore.exceptions import ClientError
    
    try:
        s3c = boto3.client('s3')
        s3c.head_object(Bucket=bucket, Key=key)
        return True
    
    except ClientError: # https://stackoverflow.com/a/42978638
        return False

# check_s3_key_exist(s3_bucket, s3_key)

In [None]:
def download_to_s3(bucket, key, url):
    with requests.get(url, stream=True) as response:
        s3c = boto3.client('s3')
        s3c.upload_fileobj(response.raw, bucket, key)
    
# download_to_s3(s3_bucket, s3_key, url)

In [None]:
import urllib.request
def download_to_s3(bucket, key, url):
    with urllib.request.urlopen(url) as response:
        s3c = boto3.client('s3')
        s3c.upload_fileobj(response, bucket, key)
    
# download_to_s3(s3_bucket, s3_key, url)

In [None]:
content = requests.get(url).content.decode('big5').split('\n')
content

In [None]:
df:pd.DataFrame = pd.read_csv(io.StringIO('\n'.join(content[1:])), encoding='big5')
df

In [None]:
df.columns = ['name', 'date', 'weekday', 'description', 'memo']
df

In [None]:
def extract_date(date_string, year) -> datetime.date:
    m, d = re.search('(\d+)月(\d+)日', date_string).groups()
    return datetime.date(year, int(m), int(d))
    
df['ds'] = df['date'].apply(extract_date, year=year)
df['is_workday'] = df['memo'] =='o' 

df

In [None]:
makeup_days = pd.to_datetime(df[df['is_workday']].ds)
holidays = pd.to_datetime(df[~df['is_workday']].ds)

def is_trading_days(date: datetime.date) -> bool:
    is_weekend = date.weekday() >=  5
    is_holiday = (holidays == date).any()
    is_makeup_day = (makeup_days == date).any()
    
    return (not is_weekend and not is_holiday) or is_makeup_day

In [None]:
first_day_of_year = datetime.date(year, 1, 1)
last_day_of_year = datetime.date(year, 12, 31)
# all_days = pd.date_range(first_day_of_year, last_day_of_year).tolist()[0].dayofweek # 0:mon ~ 6:sun
all_days:pd.Series = pd.date_range(first_day_of_year, last_day_of_year).to_series()

all_days[~all_days.apply(is_trading_days)]