## Import Necessary Modules

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm

from time import time, sleep
from datetime import date, datetime, timedelta
from glob import glob
from itertools import product

## Loading past data

In [2]:
nce_start = '2019-06-09'
nce_recent = '2019-12-08'
prence_start = '2017-06-10'

def load_data(csvname, startdate, enddate):
    df = pd.read_csv(csvname + '.csv', header=0)
    df['date'] = pd.to_datetime(df['date'])
    df = df.set_index('date')
    df = df.loc[startdate:enddate].apply(pd.to_numeric, errors='coerce')
    
    return df

nce_hsi = load_data('^HSI', nce_start, nce_recent)
nce_hsi.info()
nce_spx = load_data('^SPX', nce_start, nce_recent)
print(nce_spx.info())
pre_hsi = load_data('^HSI', prence_start, nce_start)
print(pre_hsi.info())
pre_spx = load_data('^SPX', prence_start, nce_start)
print(pre_spx.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 127 entries, 2019-06-10 to 2019-12-06
Data columns (total 5 columns):
open      127 non-null float64
high      127 non-null float64
low       127 non-null float64
close     127 non-null float64
volume    127 non-null float64
dtypes: float64(5)
memory usage: 6.0 KB
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 127 entries, 2019-06-10 to 2019-12-06
Data columns (total 5 columns):
open      127 non-null float64
high      127 non-null float64
low       127 non-null float64
close     127 non-null float64
volume    127 non-null float64
dtypes: float64(5)
memory usage: 6.0 KB
None
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 490 entries, 2017-06-12 to 2019-06-06
Data columns (total 5 columns):
open      490 non-null float64
high      490 non-null float64
low       490 non-null float64
close     490 non-null float64
volume    490 non-null float64
dtypes: float64(5)
memory usage: 23.0 KB
None
<class 'pandas.core.frame.DataFrame

## Weekday statistics functions

In [3]:
def get_basicstat(symbol, startdate, enddate):
    """Compute basic OHLC statistics for each data bar."""
    df = load_data(symbol, startdate, enddate)
    df['weekday'] = df.index.weekday + 1
    df['pct'] = 100 * np.log(df['close'] / df['close'].shift(1))
    df['barpct'] = 100 * np.log(df['close'] / df['open'])
    df['abs_pct'] = np.absolute(df['pct'])
    df['abs_barpct'] = np.absolute(df['barpct'])
    df['bar'] = 100 * (df['close'] - df['open']) / (df['high'] - df['low'])
    df['range'] = 100 * np.log(df['high'] / df['low'])
    return df

nce_hsibasic = get_basicstat('^HSI', nce_start, nce_recent)
nce_hsibasic.head(15)

Unnamed: 0_level_0,open,high,low,close,volume,weekday,pct,barpct,abs_pct,abs_barpct,bar,range
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-06-10,27206.61,27604.44,27155.3,27578.64,2342608000.0,1,,1.35816,,1.35816,82.831634,1.640439
2019-06-11,27677.06,27857.85,27577.91,27789.34,1881134000.0,2,0.761093,0.404858,0.761093,0.404858,40.108595,1.00997
2019-06-12,27603.12,27603.12,27222.4,27308.46,1924623000.0,3,-1.745595,-1.073227,1.745595,1.073227,-77.395461,1.388865
2019-06-13,27217.72,27294.71,26827.69,27294.71,1903405000.0,4,-0.050363,0.282468,0.050363,0.282468,16.485375,1.725835
2019-06-14,27180.74,27274.5,27051.75,27118.35,2093901000.0,5,-0.648229,-0.229801,0.648229,0.229801,-28.008979,0.82005
2019-06-17,27252.45,27501.03,27227.16,27227.16,1533265000.0,1,0.400438,-0.092842,0.400438,0.092842,-9.234308,1.000845
2019-06-18,27289.7,27582.53,27201.13,27498.77,1598572000.0,2,0.992627,0.763193,0.992627,0.763193,54.816466,1.392408
2019-06-19,28224.87,28224.87,28023.1,28202.14,2140268000.0,3,2.525659,-0.080564,2.525659,0.080564,-11.265302,0.717433
2019-06-20,28308.85,28561.55,28268.57,28550.43,1876479000.0,4,1.227414,0.849752,1.227414,0.849752,82.45614,1.031082
2019-06-21,28581.51,28581.51,28359.25,28473.71,2239069000.0,5,-0.269079,-0.37788,0.269079,0.37788,-48.501755,0.780675


In [4]:
wkday_dict = {1: 'Mon', 2: 'Tue', 3: 'Wed', 4: 'Thu', 5: 'Fri'}
kpi_column = ['pct', 'range', 'abs_pct']
mean_column = ['pct_mean', 'range_mean', 'abspct_mean']
std_column = ['pct_std', 'range_std', 'abspct_std']

In [5]:
def each_weekdaystat(symbol, startdate, enddate):
    """Get 5 dataframes regarding each weekday statistics."""
    df_symbol = get_basicstat(symbol, startdate, enddate)
    list_wkday = [df_symbol[df_symbol['weekday'] == wkday] for wkday in range(1,6)]
    list_else = [df_symbol[df_symbol['weekday'] != wkday] for wkday in range(1,6)]

    dict_kpidf = {wkday: None for wkday in range(1,6)}
    for wkday in range(1,6):
        # weekday stats
        df_wkday = list_wkday[wkday-1]
        df_else = list_else[wkday-1]
        df_wkdaymean = pd.DataFrame(df_wkday[kpi_column].mean()).transpose()
        df_wkdaymean.columns = mean_column
        df_wkdaystd = pd.DataFrame(df_wkday[kpi_column].std()).transpose()
        df_wkdaystd.columns = std_column
        df_wkdaystat = pd.concat([df_wkdaymean, df_wkdaystd], axis=1)
        # elseday stats
        df_elsemean = pd.DataFrame(df_else[kpi_column].mean()).transpose()
        df_elsemean.columns = mean_column
        df_elsestd = pd.DataFrame(df_else[kpi_column].std()).transpose()
        df_elsestd.columns = std_column   
        df_elsestat = pd.concat([df_elsemean, df_elsestd], axis=1)

        # p-value test
        df_hypo = pd.DataFrame(columns=['rangediff_zscore', 'rangediff_pvalue', 'abspctdiff_zscore', 'abspctdiff_pvalue'])
        range_var = pow(df_wkdaystat.loc[0, 'range_std'], 2)/df_wkday.shape[0]+pow(df_elsestat.loc[0,'range_std'], 2)/df_else.shape[0]
        rangediff_zscore = (df_wkdaystat.loc[0,'range_mean']-df_elsestat.loc[0, 'range_mean']) / np.sqrt(range_var)
        df_hypo['rangediff_zscore'] = [rangediff_zscore]
        df_hypo['rangediff_pvalue'] = [min(1 - norm.cdf(rangediff_zscore),  1 - norm.cdf(-rangediff_zscore))]
        abspct_var = pow(df_wkdaystat.loc[0, 'abspct_std'], 2)/df_wkday.shape[0]+pow(df_elsestat.loc[0,'abspct_std'], 2)/df_else.shape[0]
        abspctdiff_zscore = (df_wkdaystat.loc[0,'abspct_mean']-df_elsestat.loc[0, 'abspct_mean']) / np.sqrt(abspct_var)
        df_hypo['abspctdiff_zscore'] = [abspctdiff_zscore]
        df_hypo['abspctdiff_pvalue'] = [min(1 - norm.cdf(abspctdiff_zscore),  1 - norm.cdf(-abspctdiff_zscore))]

        dict_kpidf[wkday] = df_wkdaystat, df_elsestat, df_hypo, df_wkday, df_else
        
    return dict_kpidf

## Hang Seng Index: NCE vs pre-NCE

In [6]:
code = '^HSI'
dict_nce_hsi = each_weekdaystat(code, nce_start, nce_recent)
for wkday in range(1,6):
    print('Volatility of %s on %s vs other weekdays during Movement period: \n'%(code, wkday_dict[wkday]))
    print(dict_nce_hsi[wkday][0])
    print(dict_nce_hsi[wkday][1], '\n')
    print(dict_nce_hsi[wkday][2], '\n')
    print('Number of the weekday: %s vs Number of else days: %s'%(dict_nce_hsi[wkday][3].shape[0], dict_nce_hsi[wkday][4].shape[0]), '\n')

Volatility of ^HSI on Mon vs other weekdays during Movement period: 

   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0 -0.171489    1.130712     1.040878  1.328226   0.391551    0.813792
   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0 -0.000501    1.037666     0.685421  0.950901   0.521813    0.655595 

   rangediff_zscore  rangediff_pvalue  abspctdiff_zscore  abspctdiff_pvalue
0          0.979071          0.163772           1.994342           0.023057 

Number of the weekday: 24 vs Number of else days: 103 

Volatility of ^HSI on Tue vs other weekdays during Movement period: 

   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0 -0.027594    1.025233     0.579359  0.791388   0.462731    0.526731
   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0 -0.032732    1.062606      0.79262  1.079629   0.510302    0.729488 

   rangediff_zscore  rangediff_pvalue  abspctdiff_zscore  abspctdiff_pvalue
0         

In [7]:
dict_prence_hsi = each_weekdaystat(code, prence_start, nce_start)
for wkday in range(1,6):
    print('Volatility of %s on %s vs other weekdays during pre-Movement period: \n'%(code, wkday_dict[wkday]))
    print(dict_prence_hsi[wkday][0])
    print(dict_prence_hsi[wkday][1], '\n')
    print(dict_prence_hsi[wkday][2], '\n')
    print('Number of the weekday: %s vs Number of else days: %s'%(dict_prence_hsi[wkday][3].shape[0], dict_prence_hsi[wkday][4].shape[0]), '\n')

Volatility of ^HSI on Mon vs other weekdays during pre-Movement period: 

   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0  0.178041    1.114401     0.883732  1.093541   0.432528    0.662121
   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0 -0.029755    1.164021      0.78902  1.064381   0.552566    0.713909 

   rangediff_zscore  rangediff_pvalue  abspctdiff_zscore  abspctdiff_pvalue
0         -0.944297          0.172509           1.227768           0.109767 

Number of the weekday: 94 vs Number of else days: 396 

Volatility of ^HSI on Tue vs other weekdays during pre-Movement period: 

   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0  0.009133    1.186482     0.815979  1.163162   0.516086    0.824786
   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0   0.00992    1.146609     0.804819  1.049764   0.535709     0.67284 

   rangediff_zscore  rangediff_pvalue  abspctdiff_zscore  abspctdiff_pvalue
0 

## S&P 500: NCE vs pre-NCE

In [8]:
code = '^SPX'

dict_nce_spx = each_weekdaystat(code, nce_start, nce_recent)
for wkday in range(1,6):
    print('Volatility of %s on %s vs other weekdays during Movement period: \n'%(code, wkday_dict[wkday]))
    print(dict_nce_spx[wkday][0])
    print(dict_nce_spx[wkday][1], '\n')
    print(dict_nce_spx[wkday][2], '\n')
    print('Number of the weekday: %s vs Number of else days: %s'%(dict_nce_spx[wkday][3].shape[0], dict_nce_spx[wkday][4].shape[0]), '\n')

Volatility of ^SPX on Mon vs other weekdays during Movement period: 

   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0 -0.027662     0.63455     0.558159  0.860057   0.484634    0.644522
   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0  0.090802    0.864183     0.580013  0.782997   0.510508    0.530721 

   rangediff_zscore  rangediff_pvalue  abspctdiff_zscore  abspctdiff_pvalue
0         -2.100643          0.017836          -0.156989           0.437627 

Number of the weekday: 25 vs Number of else days: 102 

Volatility of ^SPX on Tue vs other weekdays during Movement period: 

   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0 -0.071047    0.875178     0.569548  0.745958   0.482248    0.473648
   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0  0.104451    0.804512     0.577489  0.808167   0.520585    0.572095 

   rangediff_zscore  rangediff_pvalue  abspctdiff_zscore  abspctdiff_pvalue
0         

In [9]:
dict_prence_spx = each_weekdaystat(code, prence_start, nce_start)
for wkday in range(1,6):
    print('Volatility of %s on %s vs other weekdays during pre-Movement period: \n'%(code, wkday_dict[wkday]))
    print(dict_prence_spx[wkday][0])
    print(dict_prence_spx[wkday][1], '\n')
    print(dict_prence_spx[wkday][2], '\n')
    print('Number of the weekday: %s vs Number of else days: %s'%(dict_prence_spx[wkday][3].shape[0], dict_prence_spx[wkday][4].shape[0]), '\n')

Volatility of ^SPX on Mon vs other weekdays during pre-Movement period: 

   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0 -0.026292    0.926073     0.628919  0.961635    0.79132    0.724984
   pct_mean  range_mean  abspct_mean  pct_std  range_std  abspct_std
0  0.047245    0.938498     0.576152  0.86505    0.73576    0.646359 

   rangediff_zscore  rangediff_pvalue  abspctdiff_zscore  abspctdiff_pvalue
0         -0.138985          0.444731           0.648631           0.258289 

Number of the weekday: 94 vs Number of else days: 407 

Volatility of ^SPX on Tue vs other weekdays during pre-Movement period: 

   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0   0.06455    0.887952     0.560068  0.811528   0.665866    0.588195
   pct_mean  range_mean  abspct_mean   pct_std  range_std  abspct_std
0  0.025724    0.948341     0.592523  0.901312   0.764829    0.679015 

   rangediff_zscore  rangediff_pvalue  abspctdiff_zscore  abspctdiff_pvalue
0   