# GP for AT Strategy Discovery

## Concept

I need to define a function

$$
F(x) \rightarrow (B, C, S) =\prod_{k=1}^{N}\phi^k({OHLC}, \tau_{k}, \sigma_{k})
$$

$$
\phi^k(OHLC, \tau, \sigma) = \begin{cases}
    1, & \text{if } \Iota(OHLC, \tau) \lesseqqgtr \text{ is true}\\
    0, & \text{otherwise}
  \end{cases}
$$

$$
\Iota(OHLC, \tau) \rightarrow \real
$$

$$
\sigma \in \begin{cases}
    \Iota(OHLC, \tau)& \text{Technical Indicator}\\
    f, & \text{Some float value}
  \end{cases}
$$

- $N$ is the number of terms in the equation
- $(B, C, S)$ is the tuple of boolean (`buy`, `close_pos`, `sell`) signal
- $\Iota^k(OHLC)$ is an technical indicator on `open`, `high`, `low`, `close`, in timeperiod $\tau$
- $\sigma$ is the value for comparison.

The value $\sigma$ could be a `float` value or $\Iota^k(OHLC, \tau)$ of the OHLC/Technical Indicator.


In [1]:
from glob import glob
import pandas as pd
import numpy as np
from tqdm import tqdm
import random

from deap import base
from deap import creator
from deap import gp
import numpy as np
import matplotlib.pyplot as plt
from backtesting import Backtest
from gp_pset import get_pset, generate_samples
from backtester import GPStrategy
from models import Open, High, Low, Close

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp
from sqlalchemy import create_engine

engine = create_engine(
    'postgresql://postgres:postgres@localhost:4004/postgres'
)


# if 'FitnessMin' not in dir(creator):
#     creator.create("FitnessMin", base.Fitness, weights=(-1.0,))

if 'FitnessMax' not in dir(creator):
    creator.create(
        "FitnessMax",
        base.Fitness,
        weights=(1.0,),
    )

if 'Individual' not in dir(creator):
    creator.create(
        "Individual",
        gp.PrimitiveTree,
        fitness=creator.FitnessMax
    )


def load_nifty_data():

    nifty_df = pd.concat(map(
        lambda x: pd.read_csv(x, parse_dates=True),
        glob('/workspace/nifty/NIFTY/202403*.csv')
    ))
    nifty_df['datetime'] = pd.to_datetime(nifty_df['datetime'])
    nifty_df['date'] = nifty_df['datetime'].dt.date
    nifty_df = nifty_df.set_index('datetime').sort_index()
    return nifty_df


pset = get_pset()
nifty_df = load_nifty_data()
nifty_df.head()

o, h, l, c = (
    Open(nifty_df['open'].values),
    High(nifty_df['high'].values),
    Low(nifty_df['low'].values),
    Close(nifty_df['close'].values)
)

In [2]:

# eqs = generate_samples(pset, 50, min_depth=1, max_depth=6)

# records = []
# all_stats = []

# for eq in tqdm(eqs):
#     try:
#         run = Backtest(
#             nifty_df.rename(
#                 columns={
#                     'open': 'Open',
#                     'high': 'High',
#                     'low': 'Low',
#                     'close': 'Close',
#                 }
#             ),
#             GPStrategy,
#             cash=30000,
#             commission=.002,
#             trade_on_close=True,
#         )

#         stats = run.run(signal_func=gp.compile(eq, pset))
#         all_stats.append((eq, stats, stats))

#         q_vals = np.nan_to_num(stats._trades.PnL.quantile(
#             [.05, .25, .5, .75, .95]).values, 0)

#         pd.DataFrame(
#             [
#                 {
#                     'eq': str(eq),
#                     'trades': stats['# Trades'],
#                     'ret_pct': stats['Return [%]'],
#                     'equity_final': stats['Equity Final [$]'],
#                     # 'avg_trade_duration': stats['Avg. Trade Duration'],
#                     # 'max_drawdown_duration': stats['Max. Drawdown Duration'],
#                     'profit_factor': stats['Profit Factor'],
#                     'expectancy': stats['Expectancy [%]'],
#                     'sqn': stats['SQN'],
#                     'q05': q_vals[0],
#                     'q25': q_vals[1],
#                     'q50': q_vals[2],
#                     'q75': q_vals[3],
#                     'q95': q_vals[4],
#                     'metric': stats['# Trades'] * q_vals[1]
#                 }
#             ]
#         ).to_sql('gp_backtest_results', engine, if_exists='append', index=False)
#     except Exception as e:
#         print(eq, e)
#         break

# rdf = pd.read_sql('gp_backtest_results', engine)
# print(rdf.shape)
# # stats = all_stats[26][1]
# # fig, ax = plt.subplots(figsize=(15, 5))
# # # ax.vlines(stats._trades.index, 0, stats._equity_curve.values[stats._trades.index])
# # ax.axhline(stats._equity_curve.Equity.iloc[0], color='k', linestyle='--')
# # ax.axhline(stats._equity_curve.Equity.iloc[-1], color='g', linestyle='--')
# # stats._equity_curve.reset_index().Equity.plot(ax=ax)
# # print(stats)
# rdf[rdf['trades'] > 0].sort_values(['q50'], ascending=False)

In [3]:
import warnings
warnings.filterwarnings('ignore')
toolbox = base.Toolbox()
toolbox.register(
    "expr",
    gp.genHalfAndHalf,
    pset=pset,
    min_=1,
    max_=8
)
toolbox.register(
    "individual",
    tools.initIterate,
    creator.Individual,
    toolbox.expr
)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# toolbox.register("compile", gp.compile, primitive_set=pset)


def evalFitness(individual):
    try:
        run = Backtest(
            nifty_df.rename(
                columns={
                    'open': 'Open',
                    'high': 'High',
                    'low': 'Low',
                    'close': 'Close',
                }
            ),
            GPStrategy,
            cash=30000,
            commission=.002,
            trade_on_close=True,
        )

        stats = run.run(signal_func=gp.compile(individual, pset))

        q_vals = np.nan_to_num(stats._trades.PnL.quantile(
            [.05, .25, .5, .75, .95]).values, 0)

        pd.DataFrame(
            [
                {
                    'eq': str(individual),
                    'trades': stats['# Trades'],
                    'ret_pct': stats['Return [%]'],
                    'equity_final': stats['Equity Final [$]'],
                    # 'avg_trade_duration': stats['Avg. Trade Duration'],
                    # 'max_drawdown_duration': stats['Max. Drawdown Duration'],
                    'profit_factor': stats['Profit Factor'],
                    'expectancy': stats['Expectancy [%]'],
                    'sqn': stats['SQN'],
                    'q05': q_vals[0],
                    'q25': q_vals[1],
                    'q50': q_vals[2],
                    'q75': q_vals[3],
                    'q95': q_vals[4],
                    'metric': stats['# Trades'] * q_vals[1]
                }
            ]
        ).to_sql('gp_backtest_results', engine, if_exists='append', index=False)

        return stats['Return [%]'],  # ,  stats['# Trades'] * q_vals[1],
    # stats['# Trades'], stats['Return [%]'], q_vals[0], q_vals[1], q_vals[2], q_vals[3], q_vals[4],

    except Exception as e:
        print(eq, e)
        return -9999, -9999,


toolbox.register('evaluate', evalFitness)
toolbox.register('select', tools.selTournament, tournsize=3)
toolbox.register('mate', gp.cxOnePoint)
toolbox.register('expr', gp.genFull, min_=1, max_=10)
toolbox.register('mutate', gp.mutUniform, expr=toolbox.expr, pset=pset)

In [4]:


def main():
    random.seed(1024)

    pop = toolbox.population(n=100)
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    # algorithms.eaMuPlusLambda(
    algorithms.eaSimple(
        population=pop,
        toolbox=toolbox,
        # mu=0.5,
        # lambda_=0.2,
        ngen=50,
        stats=stats,
        cxpb=0.5,
        mutpb=0.3,
        halloffame=hof,
        verbose=True
    )
    print('Best individual : ', hof[0][0], hof[0].fitness)

    return pop, hof, stats


pop, hof, stats = main()

gen	nevals	avg     	std    	min     	max
0  	100   	-8.26694	11.5279	-27.8967	0  
1  	63    	-1.21631	4.69597	-27.7842	0  
2  	64    	-0.0871258	0.866891	-8.71258	0  
3  	66    	0         	0       	0       	0  
4  	66    	-0.0772853	0.768979	-7.72853	0  
5  	62    	0         	0       	0       	0  
6  	72    	0         	0       	0       	0  
7  	69    	0         	0       	0       	0  
8  	54    	-0.267402 	2.66061 	-26.7402	0  
9  	64    	0         	0       	0       	0  
10 	57    	0         	0       	0       	0  
11 	71    	-0.0538029	0.535332	-5.38029	0  
12 	69    	0         	0       	0       	0  
13 	61    	0         	0       	0       	0  
14 	68    	0         	0       	0       	0  
15 	67    	0         	0       	0       	0  
16 	59    	0         	0       	0       	0  
17 	61    	-0.27886  	2.77462 	-27.886 	0  
18 	61    	0         	0       	0       	0  
19 	65    	0         	0       	0       	0  
20 	65    	0         	0       	0       	0  
21 	63    	-0.00754608	0.0750825	-0.75460