In [1]:
%env PGE_DB_NAME = pgequity
%env PGE_DB_USER = pgequity
%env PGE_DB_PASSWORD = pgequity
%env PGE_DB_HOST = localhost
%env PGE_DB_PORT = 5432

env: PGE_DB_NAME=pgequity
env: PGE_DB_USER=pgequity
env: PGE_DB_PASSWORD=pgequity
env: PGE_DB_HOST=localhost
env: PGE_DB_PORT=5432


In [2]:
import random

from pynoahdb.symbolseries import SymbolSeries

PARAM1_WINDOW_SIZES = [3,5,8,13,21,34,55,89,144,233]
PARAM2_WINDOW_SIZES = [5,8,13,21,34,55,89,144,233,377]

PARAM1_RATIO = [0.10,0.25,0.5,0.75,0.90]

OUTER_VARS = ["C({0})","H({0})","L({0})","O({0})","V({0})"]

FUNCTIONS = [
    ["RATIO_TO_RAVG({0},{1})",["t","w"],False],
    ["RATIO_TO_RLINEAR({0},{1})",["t","w"],False],
    ["QUANTILE_RATIO({0},{1},0.25,0.75)",["t","w"],False],
    ["QUANTILE_RATIO({0},{1},0.10,0.90)",["t","w"],False],
    ["MACDFAST({0})",["t"],False],
    ["MACDSLOW({0})",["t"],False],
    ["RSI({0},{1})",["t","w"],False],
    ["STOCH_OSC({0},{1})",["s","w"],True],
    ["RENTROPY({0},{1})",["t","w"],False],
    ["RKURT({0},{1})",["t","w"],False],
    ["RSKEW({0},{1})",["t","w"],False],
    ["RAVG_MEDIAN({0},{1})",["t","w"],False],
    ["RAVGS_RATIO({0},{1},{2})",["t","w","w"],False],
    ["RSEMS_RATIO({0},{1},{2})",["t","w","w"],False],
    ["STOCH_OSC_RAVG({0},{1},{2})",["s","w","w"],True],
    ["W_VOL_AVG({0},{1},{2})",["s","w","w"],True],
]

def generate_functions(f_count):
    #Get the symbols
    symbols = SymbolSeries().symbol_list()

    ret_list = []

    selected_functions = 0

    while selected_functions < f_count:
        #Get the function
        function = random.choice(FUNCTIONS)

        #Get the symbol
        symbol = random.choice(symbols)

        ##if the function requires a symbol and the symbol has only close data
        if function[2] and symbol[1]:
            continue #skip this iteration

        #Get the parameters
        params = []
        for p in function[1]:
            if p == "s":
                params.append(symbol[0])
            elif p == "w":
                params.append(random.choice(PARAM1_WINDOW_SIZES))
            elif p == "t":
                if symbol[1]:
                    params.append(f"C({symbol[0]})")
                else:
                    metric = random.choice(OUTER_VARS)
                    params.append(metric.format(symbol[0]))

        #Add the function to the list
        ret_list.append(function[0].format(*params))

        selected_functions += 1

    return "|".join(ret_list)
                


In [None]:
# from pynoahdb.series import Series
from pyfunc.processor import Processor

import seaborn as sns
import matplotlib.pyplot as plt

import keras
import tensorflow as tf 

import sklearn.metrics as metrics


import pandas as pd
import numpy as np

from sklearn.decomposition import PCA

tf.keras.config.disable_interactive_logging()


target_func = "FUTURE_PERCENT_PROFIT(TQQQ,2)"

df_target = Processor().process(target_func)

print(df_target.head())

best_model = None
best_accuracy = 0
best_functions = None

#in range 1 to 10

FUNCTION_COUNT = 30

for i in range(0,10000):

    #generate the functions
    functions = generate_functions(FUNCTION_COUNT)

    try:
        df_X = Processor().process(functions, autoscale=True)

        #Merge the dataframes
        df = pd.merge(df_X, df_target, on='index_date', how='inner')

        df.dropna(inplace=True)

        #if we have less than 1000 rows, skip
        if df.shape[0] < 200:
            continue

        labels=['Worst','Bad','Neutral','Good','Best']

        df['ProfitCategory'] = pd.qcut(df[target_func], 5, labels=labels)

        #Convert the ProfitCategory to a one-hot encoding
        df = pd.get_dummies(df, columns=['ProfitCategory'])


        df_X = df.drop([target_func,'ProfitCategory_Worst','ProfitCategory_Bad','ProfitCategory_Neutral','ProfitCategory_Good','ProfitCategory_Best'], axis=1)
        df_Y = df[['ProfitCategory_Worst','ProfitCategory_Bad','ProfitCategory_Neutral','ProfitCategory_Good','ProfitCategory_Best']]

        #PCA to reduce the number of features to 10
        pca = PCA(n_components=10)
        df_X = pd.DataFrame(pca.fit_transform(df_X))

        predictions = []
        test_values = []

        model = keras.models.Sequential([
            keras.layers.Input(shape=(10,10,1)),
            keras.layers.Conv2D(50, 10, activation='relu', padding='same'),
            keras.layers.MaxPooling2D(2),
            keras.layers.Dropout(0.5),
            keras.layers.Conv2D(100, 5, activation='relu', padding='same'),
            keras.layers.MaxPooling2D(2),
            keras.layers.Dropout(0.5),
            keras.layers.Flatten(),
            keras.layers.Dense(50, activation='sigmoid'),
            keras.layers.Dropout(0.5),
            keras.layers.Dense(5, activation='softmax')
        ])

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        WINDOW_SIZE = 1000
        for i in range(0, len(df_X) - WINDOW_SIZE - 20):
        # for i in range(0, 2):

            df_X_train = []
            df_Y_train = []

            for j in range(0, WINDOW_SIZE):
                df_X_train.append(df_X.iloc[i+j:i+10+j])
                df_Y_train.append(df_Y.iloc[i+10+j])
                
            df_X_train = np.array(df_X_train)
            df_Y_train = np.array(df_Y_train)

            df_X_test = np.array([df_X.iloc[i+WINDOW_SIZE+1:i+WINDOW_SIZE+11]])
            df_Y_test = np.array([df_Y.iloc[i+WINDOW_SIZE+11]])
            df_Y_test = labels[np.argmax(df_Y_test, axis=1)[0]]

            test_values.append(df_Y_test)

            model.fit(df_X_train, df_Y_train, epochs=10, batch_size=10, verbose=1)

            y_pred = model.predict(df_X_test)

            filters, biases = model.layers[0].get_weights()
            fmin, fmax = filters.min(), filters.max()
            filters = (filters - fmin) / (fmax - fmin)


            if i % 100 == 0:
                print(f"{i} of {len(df_X)}")
                print("Predicted: ", labels[np.argmax(y_pred, axis=1)[0]], " Actual: ", df_Y_test)


            #convert the prediction to categorical
            y_pred = labels[np.argmax(y_pred, axis=1)[0]]
            predictions.append(y_pred)          

        test_values = np.array(test_values)
        predictions = np.array(predictions)

        print("*"*80)
        print("Functions")
        print(functions)
        print(metrics.accuracy_score(test_values, predictions))
        print(metrics.confusion_matrix(test_values, predictions))
        print(metrics.classification_report(test_values, predictions))

        if metrics.accuracy_score(test_values, predictions) > best_accuracy:
            best_accuracy = metrics.accuracy_score(test_values, predictions)
            best_model = model
            best_functions = functions

        print("Best Accuracy: ", best_accuracy)
        print("Best Functions: ", best_functions)

    except Exception as e:
        #rethrow the exception
        # raise e
        continue






            FUTURE_PERCENT_PROFIT(TQQQ,2)
index_date                               
2010-02-11                       0.053427
2010-02-12                       0.027685
2010-02-16                       0.002498
2010-02-17                       0.012901
2010-02-18                       0.018388


2024-11-04 11:51:06.218899: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Ultra
2024-11-04 11:51:06.218924: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 128.00 GB
2024-11-04 11:51:06.218927: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 48.00 GB
2024-11-04 11:51:06.218943: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-11-04 11:51:06.218956: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


********************************************************************************
Functions
QUANTILE_RATIO(C(BAMLEMPVPRIVSLCRPIUSSYTW),8,0.10,0.90)|RSI(C(THREEFF8),55)|QUANTILE_RATIO(C(BAMLEMPVPRIVSLCRPIUSSYTW),34,0.25,0.75)|RKURT(C(NONFIN2140A2P2AMT),34)|QUANTILE_RATIO(H(SJM),13,0.10,0.90)|QUANTILE_RATIO(L(IDXN225),8,0.25,0.75)|QUANTILE_RATIO(C(AB1020AAVOL),5,0.10,0.90)|RENTROPY(L(V),34)|RAVG_MEDIAN(C(AB14AAVOL),13)|RAVGS_RATIO(H(CAG),89,3)|QUANTILE_RATIO(L(NRG),5,0.10,0.90)|QUANTILE_RATIO(C(DEXTAUS),89,0.10,0.90)|W_VOL_AVG(C,55,34)|RSKEW(C(THREEFYTP7),5)|QUANTILE_RATIO(L(OKE),21,0.10,0.90)|RSEMS_RATIO(C(JNJ),34,5)|RKURT(C(THREEFFTP1),55)|STOCH_OSC(TXN,21)|RSI(C(T5YIFR),89)|RATIO_TO_RAVG(C(THREEFF1),21)|RSI(C(BAMLEM4BRRBLCRPISYTW),55)|W_VOL_AVG(EBAY,55,21)|QUANTILE_RATIO(L(HAS),233,0.10,0.90)|MACDFAST(C(DCPF3M))|W_VOL_AVG(ATI,89,21)|RSI(V(OMC),144)|STOCH_OSC_RAVG(DELL,5,89)|RKURT(C(BAMLEM4RBLLCRPIUSEY),8)|MACDSLOW(C(DGS1MO))|RENTROPY(C(UPS),8)
nan
[]


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
2024-11-04 11:51:12.905255: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


0 of 1198
Predicted:  Neutral  Actual:  Good
100 of 1198
Predicted:  Worst  Actual:  Worst
********************************************************************************
Functions
RATIO_TO_RAVG(C(AB1020AAVOL),8)|RSEMS_RATIO(O(CAT),5,34)|STOCH_OSC_RAVG(TROW,8,55)|RENTROPY(C(IDXN225),233)|RATIO_TO_RAVG(C(BAMLEMHYHYLCRPIUSTRIV),233)|QUANTILE_RATIO(H(IDXT100),233,0.25,0.75)|MACDSLOW(C(BAMLCC1A013YTRIV))|RATIO_TO_RAVG(C(THREEFYTP4),3)|QUANTILE_RATIO(C(DFII30),34,0.10,0.90)|RATIO_TO_RLINEAR(V(JPYX),21)|STOCH_OSC(ZARX,34)|STOCH_OSC_RAVG(THBX,89,21)|STOCH_OSC(PEP,21)|MACDFAST(O(STX))|RSKEW(C(THREEFFTP4),8)|RENTROPY(C(BBY),8)|QUANTILE_RATIO(C(T3MFF),144,0.25,0.75)|STOCH_OSC(LUV,13)|RAVGS_RATIO(C(AB1020AAVOL),233,3)|STOCH_OSC(NRG,8)|RSEMS_RATIO(C(BAMLEMEBCRPIEEY),233,8)|MACDFAST(V(MDT))|RAVGS_RATIO(H(PEP),55,13)|RAVG_MEDIAN(O(IAU),233)|RSKEW(O(PG),13)|MACDSLOW(V(IPG))|QUANTILE_RATIO(C(NONFIN14AAVOL),55,0.10,0.90)|MACDSLOW(H(EURCHFX))|RSEMS_RATIO(V(M),3,21)|RSI(C(IDXHSI),5)
0.20786516853932585


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


********************************************************************************
Functions
RKURT(C(RPONTSYD),55)|RAVG_MEDIAN(H(PH),21)|STOCH_OSC(PM,21)|RSEMS_RATIO(L(AIV),21,3)|RAVGS_RATIO(C(RRPTSYD),89,8)|RATIO_TO_RAVG(C(RPONTTLD),34)|MACDSLOW(C(WLEMUINDXD))|RENTROPY(C(BAMLEMXOCOLCRPIUSEY),21)|RATIO_TO_RLINEAR(C(RPMBSD),34)|RKURT(C(BAMLEMUBCRPIUSTRIV),13)|W_VOL_AVG(IDXN225,5,144)|MACDFAST(C(DAAA))|STOCH_OSC_RAVG(EOG,13,13)|RSEMS_RATIO(V(LOW),89,55)|MACDFAST(C(DEXKOUS))|RSEMS_RATIO(C(MKT2140MKTVOL),34,55)|RATIO_TO_RLINEAR(C(RPONTTLD),3)|QUANTILE_RATIO(C(FMC),13,0.25,0.75)|RATIO_TO_RAVG(C(BAA10Y),233)|STOCH_OSC_RAVG(CI,5,5)|QUANTILE_RATIO(C(THREEFYTP3),5,0.10,0.90)|QUANTILE_RATIO(L(HAL),5,0.25,0.75)|RAVG_MEDIAN(O(HRL),13)|RAVGS_RATIO(O(NUE),8,8)|RAVG_MEDIAN(O(CSX),233)|MACDFAST(C(THREEFF4))|RATIO_TO_RAVG(C(BAMLEMPTPRVICRPIEY),233)|RAVGS_RATIO(C(THREEFF9),89,34)|RATIO_TO_RLINEAR(C(DPRIME),233)|RAVG_MEDIAN(C(DTP10L25),21)
nan
[]


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


********************************************************************************
Functions
RAVGS_RATIO(C(FIN59AAAMT),233,5)|RKURT(C(BAMLEM2RBBBLCRPIUSTRIV),21)|STOCH_OSC(HP,34)|RAVG_MEDIAN(C(DGS2),89)|RAVGS_RATIO(L(IDXAORD),3,144)|RAVG_MEDIAN(C(BAMLEMIBHGCRPIEY),89)|RENTROPY(C(OVXCLS),21)|RAVGS_RATIO(C(SP500),21,3)|RSEMS_RATIO(C(NONFINGT80AAAMT),13,233)|QUANTILE_RATIO(L(TGT),233,0.25,0.75)|STOCH_OSC_RAVG(ORLY,8,8)|RKURT(C(BAMLCC1A013YTRIV),89)|RSI(L(CHRW),34)|RSEMS_RATIO(L(NDAQ),13,89)|QUANTILE_RATIO(C(DTB3),144,0.25,0.75)|RSI(C(BAMLEMHBHYCRPISYTW),34)|RATIO_TO_RLINEAR(C(BAMLEMPVPRIVSLCRPIUSTRIV),89)|RAVG_MEDIAN(O(WM),34)|RSKEW(H(CMCSA),34)|QUANTILE_RATIO(C(THREEFYTP10),34,0.25,0.75)|MACDSLOW(O(EBAY))|QUANTILE_RATIO(C(AVY),34,0.10,0.90)|STOCH_OSC_RAVG(WMB,34,21)|RSI(L(SUN),55)|RKURT(C(BAMLEMIBHGCRPIOAS),89)|QUANTILE_RATIO(L(HRL),89,0.25,0.75)|RENTROPY(C(DEXSZUS),21)|RSEMS_RATIO(C(BAMLEMELLCRPIEMEAUSEY),144,13)|RAVGS_RATIO(L(GE),21,21)|RAVGS_RATIO(O(GT),5,89)
nan
[]


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


0 of 2463
Predicted:  Good  Actual:  Good
100 of 2463
Predicted:  Neutral  Actual:  Neutral
200 of 2463
Predicted:  Good  Actual:  Neutral
300 of 2463
Predicted:  Good  Actual:  Best
