In [1]:
import datetime
import pandas as pd
from typing import Generator

target_contracts = ['TX', 'MTX']

In [2]:
dealt_url_template = 'https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/{}'
dealt_filename_template = 'Daily_{}.zip'

date = datetime.date(2022, 1, 20)
s3_bucket = 'indextracker'

dealt_filename = dealt_filename_template.format(date.strftime('%Y_%m_%d'))

dealt_s3_key = f'tw/futures/raw/{dealt_filename}'
dealt_url = dealt_url_template.format(dealt_filename)

dealt_url

'https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/Daily_2022_01_20.zip'

In [3]:
import urllib.request
with urllib.request.urlopen(dealt_url) as response:
    print(response.getheader(name="Content-Type"))
    print(response.status)

application/zip
200


In [4]:
def get_zipped_objects_from_s3(bucket, key):
    import boto3, zipfile, io
    s3 = boto3.client('s3')
    obj = s3.get_object(Bucket=bucket, Key=key)
    with io.BytesIO(obj['Body'].read()) as f:
        with zipfile.ZipFile(f) as zf:
            for file in zf.namelist():
                yield zf.read(file)

In [5]:
dealt_content = list(get_zipped_objects_from_s3(s3_bucket, dealt_s3_key))[0]
dealt_arr = dealt_content.decode('big5').split('\r\n')

dealt_arr[0]

'成交日期,商品代號,到期月份(週別),成交時間,成交價格,成交數量(B+S),近月價格,遠月價格,開盤集合競價 '

In [6]:
dealt_arr_2d = [x.split(',') for x in dealt_arr[1:]]

dealt_df = pd.DataFrame(dealt_arr_2d)
dealt_df.columns = ['date', 'contract', 'expire', 'time', 'price', 'volume', 'near_price', 'far_price', 'is_open_auction']
dealt_df

Unnamed: 0,date,contract,expire,time,price,volume,near_price,far_price,is_open_auction
0,20220119,BRF,202203,182841,2441,10,-,-,
1,20220119,BRF,202203,183151,2442.5,10,-,-,
2,20220119,BRF,202203,184034,2444.5,10,-,-,
3,20220119,BRF,202203,190642,2440.5,30,-,-,
4,20220119,BRF,202203,191830,2440.5,2,-,-,
...,...,...,...,...,...,...,...,...,...
402106,20220120,ZFF,202203,112711,1785.6,2,-,-,
402107,20220120,ZFF,202203,124405,1787,2,-,-,
402108,20220120,ZFF,202203,124423,1787,2,-,-,
402109,20220120,ZFF,202203,132833,1784.6,2,-,-,


In [7]:
dealt_df['contract'] = dealt_df.contract.str.strip()
dealt_df = dealt_df[dealt_df['contract'].isin(target_contracts)]

In [8]:
dealt_df = dealt_df.drop('is_open_auction', axis=1)
dealt_df

Unnamed: 0,date,contract,expire,time,price,volume,near_price,far_price
53335,20220119,MTX,202201W4,150001,18184,4,-,-
53336,20220119,MTX,202201W4,150021,18178,2,-,-
53337,20220119,MTX,202201W4,150624,18188,2,-,-
53338,20220119,MTX,202201W4,150705,18188,2,-,-
53339,20220119,MTX,202201W4,151501,18189,2,-,-
...,...,...,...,...,...,...,...,...
390504,20220120,TX,202212,133712,17566,4,-,-
390505,20220120,TX,202212,133910,17574,2,-,-
390506,20220120,TX,202212,133910,17575,2,-,-
390507,20220120,TX,202212,134108,17571,2,-,-


In [9]:
spread_url_template = 'https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV_C/{}'
spread_filename_template = 'Daily_{}_C.zip'

spread_filename = spread_filename_template.format(date.strftime('%Y_%m_%d'))

spread_s3_key = f'tw/futures/raw/{spread_filename}'
spread_url = spread_url_template.format(spread_filename)

spread_url

'https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV_C/Daily_2022_01_20_C.zip'

In [10]:
spread_content = list(get_zipped_objects_from_s3(s3_bucket, spread_s3_key))[0]
spread_arr = spread_content.decode('big5').split('\r\n')

spread_arr[0]

'成交日期,商品代號,到期月份(週別),成交時間,成交價格,成交數量(B+S),近月價格,遠月價格,屬價差對價差成交者 '

In [11]:
spread_arr_2d = [x.split(',') for x in spread_arr[1:]]

spread_df = pd.DataFrame(spread_arr_2d)
spread_df.columns = ['date', 'contract', 'expire', 'time', 'price', 'volume', 'near_price', 'far_price', 'is_with_spread']
spread_df

Unnamed: 0,date,contract,expire,time,price,volume,near_price,far_price,is_with_spread
0,20220120,CBF,202202/202203,084526,-.06,4,34.1,34.04,*
1,20220120,CBF,202202/202203,084527,-.06,4,34.1,34.04,*
2,20220120,CBF,202202/202203,084531,-.06,32,34.1,34.04,*
3,20220120,CBF,202202/202203,084534,-.04,4,34.1,34.06,*
4,20220120,CBF,202202/202203,084609,0,4,34.15,34.15,
...,...,...,...,...,...,...,...,...,...
10461,20220120,ZFF,202202/202203,112711,-1.8,4,1787.4,1785.6,
10462,20220120,ZFF,202202/202203,124405,-.8,4,1787.8,1787,
10463,20220120,ZFF,202202/202203,124423,-.8,4,1787.8,1787,
10464,20220120,ZFF,202202/202203,132833,-.6,4,1785.2,1784.6,


In [12]:
spread_df.contract = spread_df.contract.str.strip()
spread_df = spread_df.loc[spread_df.contract.isin(target_contracts)]

In [13]:
spread_df = spread_df.drop('is_with_spread', axis=1)
spread_df

Unnamed: 0,date,contract,expire,time,price,volume,near_price,far_price
4908,20220119,MTX,202201W4/202202,150001,-18,8,18184,18166
4909,20220119,MTX,202201W4/202202,150021,-17,4,18178,18161
4910,20220119,MTX,202201W4/202202,151712,-27,4,18199,18172
4911,20220119,MTX,202201W4/202202,152230,-29,4,18194,18165
4912,20220119,MTX,202201W4/202202,155824,-29,4,18180,18151
...,...,...,...,...,...,...,...,...
10404,20220120,TX,202209/202212,095700,-81,4,17588,17507
10405,20220120,TX,202209/202212,120148,-81,4,17643,17562
10406,20220120,TX,202209/202212,133335,-80,4,17633,17553
10407,20220120,TX,202209/202212,133443,-78,4,17636,17558


In [14]:
total_df = pd.concat([dealt_df, spread_df])

In [15]:
total_df.expire.unique()

array(['202201W4     ', '202202     ', '202202/202209', '202202/202203',
       '202203     ', '202204     ', '202206     ', '202209     ',
       '202212     ', '202202/202204', '202201W4/202202',
       '202201W4/202204', '202202/202206', '202202/202212',
       '202203/202204', '202203/202212', '202206/202209', '202203/202206',
       '202204/202206', '202209/202212'], dtype=object)

In [16]:
total_df['expire'] = total_df['expire'].str.strip()
total_df['volume'] = pd.to_numeric(total_df['volume'])
total_df['datetime'] = pd.to_datetime(total_df.date.str.strip() + total_df.time.str.strip(), format='%Y%m%d%H%M%S').dt.tz_localize('Asia/Taipei')

In [17]:
switch_df = total_df[total_df['expire'].str.contains('/')]

switch_df_near = switch_df.copy()
switch_df_near.expire = switch_df_near.expire.str.extract(r'([0-9W]+)/[0-9W]+')
switch_df_near.price = switch_df_near.near_price
switch_df_near.volume = switch_df_near.volume // 2

switch_df_far = switch_df.copy()
switch_df_far.expire = switch_df_far.expire.str.extract(r'[0-9W]+/([0-9W]+)')
switch_df_far.price = switch_df_far.far_price
switch_df_far.volume = switch_df_far.volume // 2

total_df = total_df.drop(switch_df.index)
total_df = pd.concat([total_df, switch_df_near, switch_df_far])

In [18]:
total_df = total_df[['datetime', 'contract', 'expire', 'price', 'volume']]

total_df

Unnamed: 0,datetime,contract,expire,price,volume
53335,2022-01-19 15:00:01+08:00,MTX,202201W4,18184,4
53336,2022-01-19 15:00:21+08:00,MTX,202201W4,18178,2
53337,2022-01-19 15:06:24+08:00,MTX,202201W4,18188,2
53338,2022-01-19 15:07:05+08:00,MTX,202201W4,18188,2
53339,2022-01-19 15:15:01+08:00,MTX,202201W4,18189,2
...,...,...,...,...,...
10404,2022-01-20 09:57:00+08:00,TX,202212,17507,2
10405,2022-01-20 12:01:48+08:00,TX,202212,17562,2
10406,2022-01-20 13:33:35+08:00,TX,202212,17553,2
10407,2022-01-20 13:34:43+08:00,TX,202212,17558,2


In [19]:
# def get_settlement_date(year: int, month: int) -> datetime.date:
#     ### the 3rd wednesday
#     first = datetime.date(year, month, 1)
#     return first.replace(day = 15 + (2 - first.weekday()) % 7)

def get_monthly_settlement_date(date:datetime.date) -> datetime.date:
    return date.replace(day = 15 + (2 - date.weekday() + date.day - 1) % 7)

def get_next_weekly_settlement_date(date:datetime.date) -> datetime.date:
    return date + datetime.timedelta(days= 1 + (1 - date.weekday()) % 7)

# for i in range(12):
#     print(get_settlement_date(2021, i+1))
# for i in range(365):
#     print(get_next_settlement_date(datetime.date(2022, 1, 1) + datetime.timedelta(days=i)))

In [21]:
def _get_expiration_code_map(date:datetime.date) -> Generator[tuple[str, str], None, None]:

    def get_weekly_settlement_name(date:datetime.date):
        week = 1 + (date.day - 1) // 7
        if week == 3:
            return f'{date.year:4}{date.month:02}'
        else:
            return f'{date.year:4}{date.month:02}W{week:1}'

    if date.weekday() == 2:
        yield ('W', get_weekly_settlement_name(date))
    
    next_settlement = get_next_weekly_settlement_date(date)
    yield ('W', get_weekly_settlement_name(next_settlement))

    year = date.year
    month = date.month
    settlement_month = get_monthly_settlement_date(date)
    if date > settlement_month:
        month += 1

    expiration_codes = ['M', 'M+1', 'M+2', 'Q+1', 'Q+2', 'Q+3']

    for i in range(3):
        m = month - 1 + i # map to 0~11 for calculating
        yield (expiration_codes[i], f'{(year + (m // 12)):4}{((m % 12) + 1):02}')
    for i in range(3):
        m = month - 1 + (i + 1) * 3 + ( - month % 3)
        yield (expiration_codes[i + 3], f'{(year + (m // 12)):4}{((m % 12) + 1):02}')

# for i in range(365):
    # print(list(_get_expiration_code_map(datetime.date(2021, 1, 1) + datetime.timedelta(days=i))))

In [48]:
def get_expiration_code_map(date:datetime.date):
    maps = list(_get_expiration_code_map(date))
    unique = set([x[1] for x in maps])
    return {x : [y[0] for y in maps if y[1] == x] for x in unique}

get_expiration_code_map(datetime.date(2022, 2, 16))

{'202206': ['Q+1'],
 '202202W4': ['W'],
 '202212': ['Q+3'],
 '202204': ['M+2'],
 '202209': ['Q+2'],
 '202202': ['W', 'M'],
 '202203': ['M+1']}

In [None]:
# total_df = total_df[~total_df['expire'].str.contains('W')]

In [None]:
total_df.expire.unique()

In [None]:
expiration_code_map = get_expiration_code_map(date)

total_df['expire'] = total_df.expire.map(expiration_code_map)
total_df

In [None]:
total_df

In [None]:
total_df['price'] = pd.to_numeric(total_df['price'])

total_df.dtypes

In [None]:
# def roundup_to_minutes(dt:datetime.datetime, minutes:int = 1):
#     ts = dt.timestamp()
#     return datetime.datetime.utcfromtimestamp(ts + (-ts % (minutes*60)))
def roundup_to_minutes(dt:datetime.datetime, minutes:int = 1):
    return (dt + datetime.timedelta(seconds=-dt.timestamp() % (minutes*60))).astimezone(datetime.timezone.utc)

roundup_to_minutes(datetime.datetime(2020,2,22,16,9,58), 10)

In [None]:
gp = total_df.groupby(by=['contract', 'expire'])

single = gp.get_group(('TX', 'M')).copy()
single

In [None]:
single['scale'] = single.datetime.apply(roundup_to_minutes, minutes=30)

In [None]:
single.groupby('scale').agg({'price': ['first', 'max', 'min', 'last', 'mean', 'std'], 'volume': 'sum'})