# Detecting Couple candlestick traps by using NEAT

### Import Library

In [1]:
import numpy as np
import pandas as pd
import numpy as np
import pandas_ta as ta
import seaborn as sns
import os

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['figure.dpi'] = 120
import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import neat

### Load Price Data

In [3]:
import os
from pathlib import Path
notebook_path = os.getcwd()
current_dir = Path(notebook_path)
csv_file = str(current_dir.parent) + '/VN30F1M_5minutes.csv'
is_file = os.path.isfile(csv_file)
if is_file:
    dataset = pd.read_csv(csv_file, index_col='Date', parse_dates=True)
else:
    print(csv_file)
    print('remote')
    dataset = pd.read_csv("https://raw.githubusercontent.com/zuongthaotn/vn-stock-data/main/VN30ps/VN30F1M_5minutes.csv", index_col='Date', parse_dates=True)

In [4]:
data = dataset.copy()

In [5]:
data = data[data.index > '2020-11-01 00:00:00']

In [6]:
len(data[(data.index > '2025-02-14 00:00:00') & (data.index < '2025-02-14 23:00:00')])

51

In [7]:
def set_condition_1(r):
    cond = ''
    if r['Open'] < r['Close'] <= r['High'] - 0.1:
        # Xanh va co bong nen tren
        cond = 'long'
    return cond


def set_condition_2(r):
    cond = ''
    if r['Open'] < r['Close'] == r['High'] and r['High'] > r['high_s1']:
        # Xanh va khong co bong nen tren
        cond = 'long'
    return cond
    

def get_couple_candleticks_signal(r):
    signal = ''
    if 100 * r.name.hour + r.name.minute >= 1425:
        return signal
    if r['condition_1'] == 'long' and r['condition_2'] == 'long':
        signal = 'long'
    return signal

In [8]:
# Couple candlesticks signal
data['low_s1'] = data['Low'].shift(1)
data['high_s1'] = data['High'].shift(1)
data['condition_1'] = data.apply(lambda r: set_condition_1(r), axis=1)
data['condition_1'] = data['condition_1'].shift(1)
data['condition_2'] = data.apply(lambda r: set_condition_2(r), axis=1)
data['signal'] = data.apply(lambda r: get_couple_candleticks_signal(r), axis=1)

In [9]:
data["ATR"] = ta.atr(data["High"], data["Low"], data["Close"], length=14)  # Volatility
data["RSI"] = ta.rsi(data["Close"], length=14)  # Momentum indicator
data['min_5'] = data['Low'].rolling(5).min()
data['min_51'] = data['Low'].shift(1).rolling(50).min()
data['max_51'] = data['Low'].shift(1).rolling(50).max()
data['close_s1'] = data['Close'].shift(1)
data['price_move'] = data.apply(lambda r: r['Close'] - r['close_s1'], axis=1)

## TRAP labeling

In [10]:
traps = []
for i, row in data.iterrows():
    if row['signal']:
        current_date = row.name.strftime('%Y-%m-%d ').format()
        current_time = row.name
        data_to_end_day = data[(data.index > current_time) & (data.index < current_date + ' 14:30:00')]
        #
        data_at_end_day = data[data.index == current_date + ' 14:25:00']
        last_close = data_at_end_day.iloc[0]['Close']
        #
        if len(data_to_end_day[data_to_end_day.Low < row['Close'] - 3.5]) > 0 or row['Close'] > last_close:
            traps.append(1)
        else:
            traps.append(0)
    else:
        traps.append('')

In [11]:
data['trap'] = traps

In [12]:
signal_data = data[data.signal != '']
signal_data.dropna(inplace=True)

In [13]:
len(signal_data)

1102

## Features

In [14]:
X = signal_data[['ATR', 'RSI', 'min_5', 'min_51', 'max_51', 'price_move', "trap"]]
# Train-Test Split
X_train = X[X.index < '2024-07-01 00:00:00']
X_test = X[X.index > '2024-07-01 00:00:00']
X_train['trap'] = X_train['trap'].astype(int)

In [15]:
len(X_train)

959

In [16]:
len(X_train[X_train.trap == 0])

354

In [17]:
X_none_trap = X_train[X_train.trap == 0]
X2_none_trap = pd.concat([X_none_trap, X_none_trap], ignore_index=True)
balanced_X_train = pd.concat([X_train, X2_none_trap], ignore_index=True)

In [18]:
len(balanced_X_train)

1667

In [19]:
len(X_test)

143

In [20]:
def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        genome.fitness = 4.0
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        for move_index, row in balanced_X_train.iterrows():
            inputs = [row['min_51'], row['max_51'], row['ATR'], row['RSI'], row['min_5'], row['price_move']]
            expected_output = row['trap']
            output = net.activate(inputs)
            genome.fitness -= (output[0] - expected_output) ** 2


def run(config_file):
    # Load configuration.
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)

    # Create the population, which is the top-level object for a NEAT run.
    p = neat.Population(config)

    # Add a stdout reporter to show progress in the terminal.
    # p.add_reporter(neat.StdOutReporter(True))
    # stats = neat.StatisticsReporter()
    # p.add_reporter(stats)

    # Run for up to 100 generations.
    winner = p.run(eval_genomes, 30)

    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(winner))
    return neat.nn.FeedForwardNetwork.create(winner, config)

In [21]:
%%time
config_path = os.path.join(current_dir, 'style-mix-1.cfg')
best_brain = run(config_path)


Best genome:
Key: 4299
Fitness: -370.49039294858073
Nodes:
	0 DefaultNodeGene(key=0, bias=-0.2933344431370661, response=1.0, activation=sigmoid, aggregation=sum)
	427 DefaultNodeGene(key=427, bias=-2.1620524073148104, response=1.0, activation=sigmoid, aggregation=sum)
Connections:
	DefaultConnectionGene(key=(-5, 427), weight=0.04596314193154761, enabled=True)
	DefaultConnectionGene(key=(-4, 427), weight=-1.320451917628233, enabled=True)
	DefaultConnectionGene(key=(-3, 0), weight=0.061953786786126175, enabled=True)
	DefaultConnectionGene(key=(-2, 427), weight=-1.781822776648491, enabled=True)
	DefaultConnectionGene(key=(427, 0), weight=-0.629243348734653, enabled=False)
CPU times: user 4min 53s, sys: 39.2 ms, total: 4min 53s
Wall time: 4min 54s


In [22]:
best_brain

<neat.nn.feed_forward.FeedForwardNetwork at 0x70f51e535cd0>

In [23]:
# Show output of the most fit genome against training data.
outputs = []
for i, row in X_test.iterrows():
    inputs = [row['min_51'], row['max_51'], row['ATR'], row['RSI'], row['min_5'], row['price_move']]
    expected_output = row['trap']
    output = best_brain.activate(inputs)
    outputs.append(round(output[0]))
    # print("input {!r}, expected output {!r}, got {!r}".format(inputs, expected_output, output))

In [24]:
expected_outputs = X_test['trap'].to_list()
# Evaluate Performance
print("Accuracy:", accuracy_score(expected_outputs, outputs))

Accuracy: 0.3986013986013986
