# Candlestick body & pattern & return relationship

In [148]:
import numpy as np
import pandas as pd
import numpy as np
import seaborn as sns
import os
from pathlib import Path
import pandas_ta as ta
import pspriceaction.price_action as pa
import warnings
warnings.filterwarnings('ignore')

### Load Price Data

In [149]:
data = []
notebook_path = os.getcwd()
current_dir = Path(notebook_path)
algo_dir = current_dir.parent.parent
transform_csv_file = str(current_dir) + '/VN30F1M_5minutes_pattern.csv'
if os.path.isfile(transform_csv_file):
    data = pd.read_csv(transform_csv_file, index_col='Date', parse_dates=True)
    data['model'] = data['model'].fillna(value='')
else:
    csv_file = str(algo_dir) + '/vn-stock-data/VN30ps/VN30F1M_5minutes.csv'
    is_file = os.path.isfile(csv_file)
    if is_file:
        dataset = pd.read_csv(csv_file, index_col='Date', parse_dates=True)
    else:
        dataset = pd.read_csv("https://raw.githubusercontent.com/zuongthaotn/vn-stock-data/main/VN30ps/VN30F1M_5minutes.csv", index_col='Date', parse_dates=True)
    data = dataset.copy()
    data = pa.pattern_modeling(data)
    data.to_csv(transform_csv_file)

In [150]:
### Ignore this year data => tranh over fitting
data = data[data.index < '2024-01-01 00:00:00']

### Calculating difference between Current close vs High & Low before

In [151]:
%%time
clone_data = data[['High', 'Low', 'Close']]
diff_high_before = []
diff_low_before = []
for i, row in clone_data.iterrows():
    if 100*row.name.hour + row.name.minute == 900:
        diff_high_before.append(0)
        diff_low_before.append(0)
    else:
        current_date = row.name.strftime('%Y-%m-%d ').format()
        current_time = row.name
        data_from_start_day = clone_data[(clone_data.index < current_time) & (clone_data.index > current_date+' 08:30:00')]
        max_high = data_from_start_day['High'].max()
        min_low = data_from_start_day['Low'].min()
        current_close = row['Close']
        dhb = 1000 * (max_high - current_close) / current_close
        dlb = 1000 * (current_close - min_low) / current_close
        diff_high_before.append(dhb)
        diff_low_before.append(dlb)
data["diff_high"] = diff_high_before
data["diff_low"] = diff_low_before
data["diff_high"] = data["diff_high"].round(1)
data["diff_low"] = data["diff_low"].round(1)

CPU times: user 49.9 s, sys: 661 μs, total: 49.9 s
Wall time: 49.9 s


### Calculating return

In [152]:
def has_bullish_pattern(model):
    if "bullish" in model or "rising" in model:
        return True
    return False

def has_bearish_pattern(model):
    if "bearish" in model or "falling" in model:
        return True
    return False

In [153]:
%%time
data['return'] = ''
data['signal'] = ''
data['exit_time'] = ''
# Stoploss at x0 pips
sl_step = 3
# Takeprofit at y0 pips(R/R = 1/3)
tp_step = 9
for i, row in data.iterrows():
    if has_bullish_pattern(row['model']):
        # Long signal
        data.at[i, 'signal'] = 'long'
        current_date = row.name.strftime('%Y-%m-%d ').format()
        current_time = row.name
        entry_price = row['Close']
        data_to_end_day = data[(data.index > current_time) & (data.index < current_date+' 14:30:00')]
        max_price = 0
        exit_time = ''
        for k, wrow in data_to_end_day.iterrows():
            if wrow['Low'] < entry_price and wrow['Low'] < entry_price - sl_step:
                # Stop loss
                momentum = -sl_step
                exit_time = wrow.name
                break
            else:
                if wrow['High'] > entry_price + tp_step:
                    # Take profit
                    momentum = tp_step
                    exit_time = wrow.name
                    break
                else:
                    # Close at 02:25PM
                    momentum = wrow['Close'] - entry_price
                    exit_time = wrow.name
        data.at[i, 'return'] = momentum
        data.at[i, 'exit_time'] = exit_time
    elif has_bearish_pattern(row['model']):
        # Short signal
        data.at[i, 'signal'] = 'short'
        current_date = row.name.strftime('%Y-%m-%d ').format()
        current_time = row.name
        entry_price = row['Close']
        data_to_end_day = data[(data.index > current_time) & (data.index < current_date+' 14:30:00')]
        min_price = 10000
        exit_time = ''
        for k, wrow in data_to_end_day.iterrows():
            if wrow['High'] > entry_price and wrow['High'] > entry_price + sl_step:
                # Stop loss
                momentum = -sl_step
                exit_time = wrow.name
                break
            else:
                if wrow['Low'] < entry_price - tp_step:
                    # Take profit
                    momentum = tp_step
                    exit_time = wrow.name
                    break
                else:
                    # Close at 02:25PM
                    momentum = entry_price - wrow['Close']
                    exit_time = wrow.name
        data.at[i, 'return'] = momentum
        data.at[i, 'exit_time'] = exit_time

CPU times: user 17.7 s, sys: 3.23 ms, total: 17.7 s
Wall time: 17.7 s


### Return Analytics

In [154]:
has_return = data[data['return'] != '']
#----
long_return = has_return[has_return['signal'] == 'long']
short_return = has_return[has_return['signal'] == 'short']
#----
negative_return = has_return[has_return['return'] < 0]
positive_return = has_return[has_return['return'] > 0]
#----
short_negative_return = short_return[short_return['return'] < 0]
short_positive_return = short_return[short_return['return'] > 0]
long_negative_return = long_return[long_return['return'] < 0]
long_positive_return = long_return[long_return['return'] > 0]

## 1.Pattern model & max high-low & SHORT return relationship

### diff LOW distributions

In [145]:
# k = long_return.groupby(['diff_high'])[['diff_high']].count()
# with pd.option_context('display.max_rows', None,
#                        'display.max_columns', None,
#                        'display.precision', 3,
#                        ):
#     print(k)
# sns.displot(k, x="diff_high", kind="kde", bw_adjust=.1)
# k.describe()
short_return['diff_low'].describe()

count    5090.000000
mean        2.160059
std         5.004095
min       -17.800000
25%        -0.200000
50%         0.900000
75%         3.000000
max        87.500000
Name: diff_low, dtype: float64

### min low & SHORT return relationship

In [155]:
short_return['diff_low_group'] = pd.cut(short_return['diff_low'], 25)
s1 = short_return.groupby(['diff_low_group'])[['return']].count()
f1 = s1[['return']].rename(columns={'return': 'count'})
short_return.drop(columns=['diff_low_group'], inplace=True)
f1

Unnamed: 0_level_0,count
diff_low_group,Unnamed: 1_level_1
"(-17.905, -13.588]",3
"(-13.588, -9.376]",9
"(-9.376, -5.164]",46
"(-5.164, -0.952]",596
"(-0.952, 3.26]",3240
"(3.26, 7.472]",703
"(7.472, 11.684]",263
"(11.684, 15.896]",120
"(15.896, 20.108]",46
"(20.108, 24.32]",24


In [158]:
working_short_has_return = short_return[(short_return['diff_low'] > -9.4) & (short_return['diff_low'] < 20.1)]
working_short_has_return['diff_low_group'] = pd.qcut(working_short_has_return['diff_low'], 35)
a = working_short_has_return.groupby(['diff_low_group'])[['return']].mean()
b = a[['return']].rename(columns={'return': 'mean'})
c = working_short_has_return.groupby(['diff_low_group'])[['return']].count()
d = c[['return']].rename(columns={'return': 'count'})
e = pd.merge(b, d, left_index=True, right_index=True, how="left")
e

Unnamed: 0_level_0,mean,count
diff_low_group,Unnamed: 1_level_1,Unnamed: 2_level_1
"(-9.301, -3.2]",1.03931,145
"(-3.2, -2.1]",1.138667,150
"(-2.1, -1.5]",1.070988,162
"(-1.5, -1.1]",0.238732,142
"(-1.1, -0.8]",1.298857,175
"(-0.8, -0.6]",1.190588,170
"(-0.6, -0.5]",1.261728,81
"(-0.5, -0.3]",0.226519,181
"(-0.3, -0.2]",0.8775,120
"(-0.2, -0.1]",0.515504,129


In [175]:
#should_short_dk1 = (-9.3 < signal_data['diff_low'] < 2.8)

### max High & SHORT return relationship

### diff HIGH distributions

In [160]:
short_return['diff_high'].describe()

count    5090.000000
mean        7.915383
std         8.213432
min         0.000000
25%         3.000000
50%         5.300000
75%         9.600000
max        74.300000
Name: diff_high, dtype: float64

In [161]:
short_return['diff_high_group'] = pd.cut(short_return['diff_high'], 25)
s2 = short_return.groupby(['diff_high_group'])[['return']].count()
s22 = s2[['return']].rename(columns={'return': 'count'})
short_return.drop(columns=['diff_high_group'], inplace=True)
s22

Unnamed: 0_level_0,count
diff_high_group,Unnamed: 1_level_1
"(-0.0743, 2.972]",1260
"(2.972, 5.944]",1544
"(5.944, 8.916]",894
"(8.916, 11.888]",471
"(11.888, 14.86]",277
"(14.86, 17.832]",180
"(17.832, 20.804]",126
"(20.804, 23.776]",77
"(23.776, 26.748]",59
"(26.748, 29.72]",44


In [162]:
working_short_has_return = short_return[short_return['diff_high'] < 38]
working_short_has_return['diff_high_group'] = pd.qcut(working_short_has_return['diff_high'], 19)
f = working_short_has_return.groupby(['diff_high_group'])[['return']].mean()
g = f[['return']].rename(columns={'return': 'mean'})
i = working_short_has_return.groupby(['diff_high_group'])[['return']].count()
j = i[['return']].rename(columns={'return': 'count'})
k = pd.merge(g, j, left_index=True, right_index=True, how="left")
k

Unnamed: 0_level_0,mean,count
diff_high_group,Unnamed: 1_level_1,Unnamed: 2_level_1
"(-0.001, 1.4]",-0.134323,303
"(1.4, 1.9]",0.839236,288
"(1.9, 2.3]",0.774615,260
"(2.3, 2.7]",0.645255,274
"(2.7, 3.0]",0.189767,215
"(3.0, 3.4]",0.398795,249
"(3.4, 3.9]",1.028571,280
"(3.9, 4.4]",0.564062,256
"(4.4, 4.9]",0.74403,268
"(4.9, 5.5]",0.877992,259


In [174]:
#should_short_dk2 = (1.4 < signal_data['diff_high'] < 2.71 or 3.4 < signal_data['diff_high'] < 7.0 or signal_data['diff_high'] > 7.8)

## 2. Pattern model & max high-low & LONG return relationship

### min low & LONG return relationship

In [164]:
long_return['diff_low_group'] = pd.cut(long_return['diff_low'], 20)
l1 = long_return.groupby(['diff_low_group'])[['return']].count()
l11 = l1[['return']].rename(columns={'return': 'count'})
long_return.drop(columns=['diff_low_group'], inplace=True)
l11

Unnamed: 0_level_0,count
diff_low_group,Unnamed: 1_level_1
"(-0.1, 5.015]",2710
"(5.015, 10.03]",1640
"(10.03, 15.045]",729
"(15.045, 20.06]",320
"(20.06, 25.075]",151
"(25.075, 30.09]",75
"(30.09, 35.105]",49
"(35.105, 40.12]",31
"(40.12, 45.135]",15
"(45.135, 50.15]",5


In [165]:
working_long_has_return = long_return[long_return['diff_low']<35.01]
# wlht_group = working_long_has_return[['return']].groupby([working_long_has_return.model, working_long_has_return.diff_low])

In [166]:
working_long_has_return['diff_low_group'] = pd.qcut(working_long_has_return['diff_low'], 35)
m = working_long_has_return.groupby(['diff_low_group'])[['return']].mean()
n = m[['return']].rename(columns={'return': 'mean'})
o = working_long_has_return.groupby(['diff_low_group'])[['return']].count()
p = o[['return']].rename(columns={'return': 'count'})
q = pd.merge(n, p, left_index=True, right_index=True, how="left")
# y.sort_values(by='count')
q

Unnamed: 0_level_0,mean,count
diff_low_group,Unnamed: 1_level_1,Unnamed: 2_level_1
"(-0.001, 1.0]",-0.401515,198
"(1.0, 1.3]",0.014286,147
"(1.3, 1.6]",-0.384103,195
"(1.6, 1.8]",0.234615,130
"(1.8, 2.0]",-0.069014,142
"(2.0, 2.3]",0.234842,221
"(2.3, 2.5]",0.58255,149
"(2.5, 2.7]",0.704444,135
"(2.7, 3.0]",-0.046429,196
"(3.0, 3.2]",0.212698,126


In [173]:
#should_long_dk1 = (3.7 < signal_data['diff_low'] < 7.1 or signal_data['diff_low'] > 7.6)

### max High & LONG return relationship

In [168]:
long_return['diff_high_group'] = pd.cut(long_return['diff_high'], 22)
l2 = long_return.groupby(['diff_high_group'])[['return']].count()
l22 = l2[['return']].rename(columns={'return': 'count'})
long_return.drop(columns=['diff_high_group'], inplace=True)
l22

Unnamed: 0_level_0,count
diff_high_group,Unnamed: 1_level_1
"(-48.408, -43.373]",1
"(-43.373, -38.445]",0
"(-38.445, -33.518]",0
"(-33.518, -28.591]",1
"(-28.591, -23.664]",0
"(-23.664, -18.736]",1
"(-18.736, -13.809]",2
"(-13.809, -8.882]",7
"(-8.882, -3.955]",48
"(-3.955, 0.973]",3070


In [169]:
working_long_has_return_2 = long_return[(long_return['diff_high']<21) & (long_return['diff_high'] > -9)]

In [170]:
working_long_has_return_2['diff_high_group'] = pd.qcut(working_long_has_return_2['diff_high'], 30)
r = working_long_has_return_2.groupby(['diff_high_group'])[['return']].mean()
s = r[['return']].rename(columns={'return': 'mean'})
t = working_long_has_return_2.groupby(['diff_high_group'])[['return']].count()
u = t[['return']].rename(columns={'return': 'count'})
v = pd.merge(s, u, left_index=True, right_index=True, how="left")
v

Unnamed: 0_level_0,mean,count
diff_high_group,Unnamed: 1_level_1,Unnamed: 2_level_1
"(-8.801, -2.1]",1.2535,200
"(-2.1, -1.5]",1.741304,184
"(-1.5, -1.1]",1.34604,202
"(-1.1, -0.8]",1.154098,244
"(-0.8, -0.6]",0.883696,184
"(-0.6, -0.5]",0.6576,125
"(-0.5, -0.3]",0.82,240
"(-0.3, -0.2]",1.477181,149
"(-0.2, -0.1]",0.873797,187
"(-0.1, 0.0]",1.435263,190


In [172]:
#should_long_dk2 = (-8.8 < signal_data['diff_high'] < 0.31 or 0.4 < signal_data['diff_high'] < 0.71 or 1.1 < signal_data['diff_high'] < 1.61)