In [10]:
%env PGE_DB_NAME = pgequity
%env PGE_DB_USER = pgequity
%env PGE_DB_PASSWORD = pgequity
%env PGE_DB_HOST = localhost
%env PGE_DB_PORT = 5432

env: PGE_DB_NAME=pgequity
env: PGE_DB_USER=pgequity
env: PGE_DB_PASSWORD=pgequity
env: PGE_DB_HOST=localhost
env: PGE_DB_PORT=5432


In [None]:
import random

from pynoahdb.symbolseries import SymbolSeries

PARAM1_WINDOW_SIZES = [3,5,8,13,21,34,55,89,144,233]
PARAM2_WINDOW_SIZES = [5,8,13,21,34,55,89,144,233,377]

PARAM1_RATIO = [0.10,0.25,0.5,0.75,0.90]

OUTER_VARS = ["C({0})","H({0})","L({0})","O({0})","V({0})"]

FUNCTIONS = [
    ["RATIO_TO_RAVG({0},{1})",["t","w"],False],
    ["RATIO_TO_RLINEAR({0},{1})",["t","w"],False],
    ["QUANTILE_RATIO({0},{1},0.25,0.75)",["t","w"],False],
    ["QUANTILE_RATIO({0},{1},0.10,0.90)",["t","w"],False],
    ["MACDFAST({0})",["t"],False],
    ["MACDSLOW({0})",["t"],False],
    ["RSI({0},{1})",["t","w"],False],
    ["STOCH_OSC({0},{1})",["s","w"],True],
    ["RENTROPY({0},{1})",["t","w"],False],
    ["RKURT({0},{1})",["t","w"],False],
    ["RSKEW({0},{1})",["t","w"],False],
    ["RAVG_MEDIAN({0},{1})",["t","w"],False],
    ["RAVGS_RATIO({0},{1},{2})",["t","w","w"],False],
    ["RSEMS_RATIO({0},{1},{2})",["t","w","w"],False],
    ["STOCH_OSC_RAVG({0},{1},{2})",["s","w","w"],True],
    ["W_VOL_AVG({0},{1},{2})",["s","w","w"],True],
]

def generate_functions(f_count):
    #Get the symbols
    symbols = SymbolSeries().symbol_list()

    ret_list = []

    selected_functions = 0

    while selected_functions < f_count:
        #Get the function
        function = random.choice(FUNCTIONS)

        #Get the symbol
        symbol = random.choice(symbols)

        ##if the function requires a symbol and the symbol has only close data
        if function[2] and symbol[1]:
            continue #skip this iteration

        #Get the parameters
        params = []
        for p in function[1]:
            if p == "s":
                params.append(symbol[0])
            elif p == "w":
                params.append(random.choice(PARAM1_WINDOW_SIZES))
            elif p == "t":
                if symbol[1]:
                    params.append(f"C({symbol[0]})")
                else:
                    metric = random.choice(OUTER_VARS)
                    params.append(metric.format(symbol[0]))

        #Add the function to the list
        ret_list.append(function[0].format(*params))

        selected_functions += 1

    return "|".join(ret_list)


class GeneticAlgo:
    def __init__ (self, f_generator, f_evaluator):
        self.f_generator = f_generator
        self.f_evaluator = f_evaluator
        
    def run(self, population_size, generations):
        population = []
        for i in range(population_size):
            population.append(self.f_generator())
        
        for i in range(generations):
            for f in population:
                f_score = self.f_evaluator(f)
                f.score = f_score
            
            population = sorted(population, key=lambda x: x.score)
            
            #Select the best 50% of the population
            population = population[:int(population_size/2)]
            
            #Generate the next 50% of the population
            new_population = []
            for i in range(int(population_size/2)):
                new_population.append(self.f_generator())
            
            population.extend(new_population)
            
        return population[0]

In [None]:
from pyfunc.processor import Processor

import seaborn as sns
import matplotlib.pyplot as plt

import keras
import tensorflow as tf 

#import logistic regression models
from sklearn.linear_model import LogisticRegression

import sklearn.metrics as metrics


import pandas as pd
import numpy as np

from sklearn.decomposition import PCA

tf.keras.config.disable_interactive_logging()


target_func = "FUTURE_PERCENT_PROFIT(TQQQ,2)"

df_target = Processor().process(target_func)

df_target[target_func] = df_target[target_func].apply(lambda x: 1 if x > 0 else 0)

print(df_target.head())

FUNCTION_COUNT = 20

best_score = 0
best_functions = None


for i in range(0,10000):

    #generate the functions
    functions = generate_functions(FUNCTION_COUNT)
    
    try:
        df_X = Processor().process(functions, autoscale=True)

        #Merge the dataframes
        df = pd.merge(df_X, df_target, on='index_date', how='inner')
        
        

        df.dropna(inplace=True)

        #if we have less than 1000 rows, skip
        if df.shape[0] < 200:
            continue
        
        Y = df[target_func]
        X = df.drop(columns=[target_func])
        
        model = LogisticRegression()
        model.fit(X, Y)
        score = model.score(X,Y)
        
        if score > best_score:
            best_score = score
            best_functions = functions
            print(f"Best Score: {best_score}")
            print(f"Best Functions: {best_functions}")
            print()
        
        
        
    except Exception as e:
        # raise e
        continue

            FUTURE_PERCENT_PROFIT(TQQQ,2)
index_date                               
2010-02-11                              1
2010-02-12                              1
2010-02-16                              1
2010-02-17                              1
2010-02-18                              1
Best Score: 0.5972850678733032
Best Functions: RSEMS_RATIO(C(IHLIDXUSTPINSU),21,5)|RSI(C(RRPTSYD),21)|RAVG_MEDIAN(C(NONFIN1020A2P2VOL),8)|W_VOL_AVG(IAU,144,13)|RATIO_TO_RAVG(C(RPONMBSD),34)|MACDFAST(C(IHLIDXFRTPLOST))|RENTROPY(L(NEM),21)|RATIO_TO_RAVG(C(IHLIDXAUTPMEDITECH),21)|QUANTILE_RATIO(C(IHLIDXUSTPINDUENGI),34,0.25,0.75)|RAVG_MEDIAN(C(IHLIDXGBTPPRMA),55)|QUANTILE_RATIO(C(DTP30F42),55,0.10,0.90)|RAVG_MEDIAN(L(PLL),3)|RATIO_TO_RLINEAR(C(NONFIN14A2P2VOL),34)|RATIO_TO_RAVG(C(OBMMIC15YF),3)|RAVG_MEDIAN(C(OBFRVOL),5)|STOCH_OSC_RAVG(DE,233,3)|QUANTILE_RATIO(O(OXY),8,0.10,0.90)|RAVGS_RATIO(C(BAMLEMEBCRPIESYTW),21,8)|MACDFAST(C(RIFSPPNA2P2D60NB))|RENTROPY(C(IHLIDXFRTPSOFTDEVE),3)





Best Score: 0.617741935483871
Best Functions: MACDSLOW(O(HRL))|QUANTILE_RATIO(C(BAMLEM2RBBBLCRPIUSSYTW),89,0.10,0.90)|STOCH_OSC_RAVG(ROP,34,13)|RSEMS_RATIO(C(IHLIDXCATPSCREDE),21,5)|QUANTILE_RATIO(C(THREEFF1),5,0.25,0.75)|RSI(L(EFX),89)|RKURT(V(LH),8)|RSEMS_RATIO(O(XRX),13,34)|RAVG_MEDIAN(C(NONFIN14AAVOL),5)|RATIO_TO_RAVG(L(CVX),34)|QUANTILE_RATIO(O(T),55,0.25,0.75)|RATIO_TO_RLINEAR(C(IHLIDXDETPHOTO),21)|RSEMS_RATIO(C(DEXSFUS),144,89)|RAVG_MEDIAN(C(BAMLC1A0C13Y),13)|RSI(C(BAMLEM1BRRAAA2ACRPITRIV),55)|MACDSLOW(C(IHLIDXUSIA))|MACDSLOW(C(MUR))|RATIO_TO_RAVG(C(OBMMIC30YFNA),21)|MACDFAST(C(IHLIDXAUTPARCH))|MACDSLOW(O(DVN))





Best Score: 0.6187845303867403
Best Functions: RSKEW(C(NONFIN1020A2P2AMT),144)|RSI(C(IHLIDXCATPCONS),55)|STOCH_OSC_RAVG(CL,89,3)|W_VOL_AVG(PPG,89,233)|MACDFAST(C(BAMLEM3RBBLCRPIUSTRIV))|QUANTILE_RATIO(C(DJTA),5,0.10,0.90)|RATIO_TO_RLINEAR(O(SLM),5)|MACDFAST(H(APD))|RATIO_TO_RLINEAR(C(VXGSCLS),3)|RKURT(L(FDX),5)|RATIO_TO_RLINEAR(O(L),55)|QUANTILE_RATIO(C(IHLIDXAUTPNURS),13,0.25,0.75)|RATIO_TO_RLINEAR(C(IHLIDX39340),89)|RAVG_MEDIAN(L(AFL),13)|MACDFAST(C(IHLIDX31080))|RENTROPY(O(HES),55)|RATIO_TO_RLINEAR(O(IDXRUT),21)|RSI(C(IHLIDXUSTPMEDITECH),144)|MACDSLOW(C(IHLIDXGBTPSEPUSA))|RAVGS_RATIO(C(RPTSYD),5,3)





Best Score: 0.6200607902735562
Best Functions: RAVGS_RATIO(O(GD),55,55)|QUANTILE_RATIO(O(MCD),233,0.25,0.75)|QUANTILE_RATIO(C(DEXSDUS),144,0.10,0.90)|RAVGS_RATIO(C(RIFSPPAAAD60NB),8,89)|RSEMS_RATIO(C(IHLIDXNEWNSAUS),34,55)|MACDSLOW(C(RIFSPPNAAD15NB))|RSKEW(C(BAMLEMPBPUBSICRPITRIV),144)|MACDFAST(C(IHLIDXUSTPSALE))|RSI(O(AIZ),8)|RSEMS_RATIO(C(BAMLEMHYHYLCRPIUSEY),89,5)|RSI(O(MDT),8)|RATIO_TO_RLINEAR(V(UNP),21)|RKURT(L(PEG),5)|RATIO_TO_RLINEAR(O(CNYX),8)|RSEMS_RATIO(H(MKC),233,144)|RSEMS_RATIO(C(IHLIDXUSCT),8,144)|RATIO_TO_RAVG(C(THREEFFTP3),34)|RSKEW(C(BAMLC0A0CM),34)|RSI(O(EL),3)|RSKEW(C(AGGDG),8)





Best Score: 0.6201550387596899
Best Functions: QUANTILE_RATIO(C(IHLIDXCATPINSU),3,0.25,0.75)|RAVGS_RATIO(L(D),233,233)|RKURT(C(IHLIDXUSTN),21)|QUANTILE_RATIO(C(DEXDNUS),144,0.10,0.90)|RKURT(C(IHLIDX18140),21)|RATIO_TO_RLINEAR(C(PCAR),89)|RSI(C(IHLIDXUSMT),5)|RATIO_TO_RLINEAR(C(TRINTDEXR),89)|RENTROPY(O(ORCL),3)|MACDFAST(C(THREEFYTP8))|RSKEW(C(DTP3HA32),89)|STOCH_OSC_RAVG(LOW,233,233)|MACDSLOW(C(CHK))|RSEMS_RATIO(C(DCOILWTICO),89,34)|RSKEW(C(IHLIDXGBTPRETA),89)|RENTROPY(C(USEPUINDXD),21)|RATIO_TO_RAVG(C(IHLIDXUSCO),8)|RSEMS_RATIO(C(OBFR1),13,55)|RSI(C(IHLIDXUSAL),89)|MACDSLOW(L(GNW))





Best Score: 0.6328671328671329
Best Functions: RSI(C(MKT4180MKTVOL),5)|RATIO_TO_RLINEAR(C(IHLIDXDETPREALESTA),34)|MACDSLOW(C(GVZCLS))|RKURT(C(ITW),5)|RAVG_MEDIAN(V(BIG),3)|RATIO_TO_RAVG(V(RUBX),8)|QUANTILE_RATIO(C(BAMLEMHBHYCRPISYTW),13,0.25,0.75)|QUANTILE_RATIO(C(IHLIDXNEWAU),21,0.25,0.75)|MACDFAST(C(OBFR75))|RAVG_MEDIAN(C(OBMMIC30YFLVGT80FB720A739),144)|RATIO_TO_RLINEAR(C(RIFSPPNA2P2D30NB),13)|RAVGS_RATIO(C(T10YIE),233,13)|RAVG_MEDIAN(C(THREEFFTP1),144)|QUANTILE_RATIO(C(BAMLEMELLCRPIEMEAUSOAS),89,0.25,0.75)|RATIO_TO_RLINEAR(L(CAH),233)|RSKEW(C(DTP30F47),34)|RSKEW(C(IHLIDXGBTPCIVIENGI),34)|RKURT(C(IHLIDXUSTPLOGISUPP),233)|RATIO_TO_RAVG(C(DJTA),21)|RSEMS_RATIO(C(IHLIDXDETPCONS),8,21)



