# Couple candlesticks trap - Detection(Predict, Classification)

### Import Library

In [39]:
import numpy as np
import pandas as pd
import numpy as np
import pandas_ta as ta
import seaborn as sns

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['figure.dpi'] = 120
import warnings
warnings.filterwarnings('ignore')

In [40]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

### Load Price Data

In [41]:
import os
from pathlib import Path
notebook_path = os.getcwd()
current_dir = Path(notebook_path)
csv_file = str(current_dir) + '/VN30F1M_5minutes.csv'
is_file = os.path.isfile(csv_file)
if is_file:
    dataset = pd.read_csv(csv_file, index_col='Date', parse_dates=True)
else:
    print('remote')
    dataset = pd.read_csv("https://raw.githubusercontent.com/zuongthaotn/vn-stock-data/main/VN30ps/VN30F1M_5minutes.csv", index_col='Date', parse_dates=True)

In [42]:
data = dataset.copy()

In [43]:
len(data)

82630

In [44]:
data = data[data.index > '2020-11-01 00:00:00']

In [45]:
def set_condition_1(r):
    cond = ''
    if r['Open'] > r['Close'] >= r['Low'] + 0.1:
        # Do va co bong nen duoi
        cond = 'short'
    elif r['Open'] < r['Close'] <= r['High'] - 0.1:
        # Xanh va co bong nen tren
        cond = 'long'
    return cond


def set_condition_2(r):
    cond = ''
    if r['Open'] > r['Close'] == r['Low'] and r['Low'] < r['low_s1']:
        # Do va khong co bong nen duoi
        cond = 'short'
    elif r['Open'] < r['Close'] == r['High'] and r['High'] > r['high_s1']:
        # Xanh va khong co bong nen tren
        cond = 'long'
    return cond
    

def get_couple_candleticks_signal(r):
    signal = ''
    if r['condition_1'] == 'short' and r['condition_2'] == 'short':
        signal = 'short'
    elif r['condition_1'] == 'long' and r['condition_2'] == 'long':
        signal = 'long'
    return signal

In [46]:
# Couple candlesticks signal
data['low_s1'] = data['Low'].shift(1)
data['high_s1'] = data['High'].shift(1)
data['max_5'] = data['High'].rolling(5).max()
data['min_5'] = data['Low'].rolling(5).min()
data['condition_1'] = data.apply(lambda r: set_condition_1(r), axis=1)
data['condition_1'] = data['condition_1'].shift(1)
data['condition_2'] = data.apply(lambda r: set_condition_2(r), axis=1)
data['signal'] = data.apply(lambda r: get_couple_candleticks_signal(r), axis=1)

## Trap labeling

In [47]:
traps = []
for i, row in data.iterrows():
    if row['signal']:
        current_date = row.name.strftime('%Y-%m-%d ').format()
        current_time = row.name
        data_to_end_day = data[(data.index > current_time) & (data.index < current_date + ' 14:30:00')]
        if not len(data_to_end_day):
            traps.append(1)
            continue
        #
        if row['signal'] == 'short':
            if len(data_to_end_day[data_to_end_day.High > row['Close'] + 3.5]) > 0:
                traps.append(1)
            else:
                traps.append(0)
        else:
            if len(data_to_end_day[data_to_end_day.Low < row['Close'] - 3.5]) > 0:
                traps.append(1)
            else:
                traps.append(0)
    else:
        traps.append(0)

In [48]:
data['trap'] = traps

In [49]:
f"{len(data[(data.signal != '')])} - {len(data[(data.signal != '') & (data.trap == 1)])} - {len(data[(data.signal != '') & (data.trap == 0)])}"

'2341 - 1378 - 963'

## RSI & Price move

In [50]:
df1 = data[['Open', 'Close', 'High', 'Low', 'signal', 'trap']][data.signal != ''].copy()
df1['RSI'] = ta.rsi(df1["Close"], length=14)
df1['RSI'] = round(df1['RSI'])
df1["ATR_14"] = ta.atr(df1['High'], df1['Low'], df1['Close'], length=14)
df1['Close_s1'] = df1['Close'].shift(1)
df1['price_move'] = df1.apply(lambda r: (r['Close'] - r['Close_s1']) - 0.5 * r['ATR_14'], axis=1)
df1['dpo'] = df1['Close'] - df1['Open'].shift(1)

In [51]:
# Define Features and Target Variable
features = ["price_move", "RSI", "dpo"]
X = df1[features]
y = df1["trap"]

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [52]:
# Train Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [53]:
# Predictions
y_pred = model.predict(X_test)

In [54]:
# Evaluate Performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.579957356076759
              precision    recall  f1-score   support

           0       0.44      0.38      0.40       178
           1       0.65      0.70      0.68       291

    accuracy                           0.58       469
   macro avg       0.54      0.54      0.54       469
weighted avg       0.57      0.58      0.57       469

