In [1]:
from classification import TStatFeatureSelector, BollingerBandDriftDetector, FixedSizeBuffer, BaseModelStockPredictor
from DataLoader import DataLoader
import pandas as pd
from itertools import product
from river import tree

### DATA LOADING

In [2]:
ticker = 'AAPL'
dataLoader = DataLoader() # if yahoo does not work use "dataLoader.get_data_locally('AAPL')"
stock_data = dataLoader.pipeline(ticker)

In [3]:
stock_data

Unnamed: 0,open,close,high,low,volume,max_5,min_5,max_10,min_10,max_20,...,stochastic_fast,stochastic_slow,%r,atr,cmo,cci,mom,bias,wnr,target
0,169.5900,169.5800,170.3900,168.9500,42104830,175.04,167.78,176.55,167.04,176.55,...,38.536927,36.110139,74.343776,4.322500,18.648208,-2.766938,4.5800,-0.491732,-0.406759,0
1,170.2900,168.8200,171.9200,168.8200,53704390,169.67,167.78,176.55,167.78,176.55,...,33.216661,34.126706,84.800000,4.281071,7.646048,0.088039,1.7800,-1.041056,-0.461770,1
2,168.7900,169.6500,170.6800,168.5800,47691720,169.67,168.45,176.55,167.78,176.55,...,39.026951,36.926846,77.422222,4.138214,7.127430,-1.735865,1.6500,-0.650617,-0.464363,0
3,169.0800,168.8400,169.3400,168.2302,49329480,169.65,168.45,176.55,167.78,176.55,...,33.356668,35.200093,84.622222,4.061771,-2.391497,-9.622342,-0.5400,-1.093687,-0.511957,1
4,171.1900,170.0300,171.2500,169.4750,46240500,170.03,168.82,176.55,167.78,176.55,...,29.466554,33.950058,74.044444,4.067843,-13.000978,9.644727,-2.6600,-0.241139,-0.565005,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2261,32.0925,31.7125,32.1425,31.6525,144994480,31.7125,31.1875,32.57,31.1875,33.1625,...,23.710317,21.660053,61.264182,0.776607,-47.001621,-13.724490,-1.4500,-0.426240,-0.735008,1
2262,31.4875,31.7750,31.8025,31.3150,160503040,31.775,31.5425,32.4175,31.1875,33.1625,...,26.190476,22.718254,55.704698,0.733393,-31.115460,-44.938474,-0.7950,0.019673,-0.655577,0
2263,31.4625,31.6400,31.6450,31.1650,128977440,31.775,31.575,32.155,31.1875,33.1625,...,20.833333,23.578042,52.380952,0.661964,-30.640394,-74.687178,-0.7775,-0.160929,-0.653202,0
2264,31.4625,31.4000,31.6000,31.2425,149088360,31.775,31.4,31.9,31.1875,33.1625,...,13.148789,20.057533,72.262774,0.585000,-30.019881,-93.230743,-0.7550,-0.681628,-0.650099,1


### PREDICTION (EXAMPLE USAGE)

In [11]:
stock_predictor = BaseModelStockPredictor(stock_data=stock_data, 
                                 model_name='hoeffdingtreeclassifier',
                                 drift_name='adwin',
                                 feature_selector_name='tstat',
                                 learning_threshold = 1000
                                 )
stock_predictor.prediction()

(0.747,
    class  precision  recall     f1
 0      0      0.738   0.802  0.769
 1      1      0.759   0.686  0.721)

In [5]:
selected = stock_predictor.pipeline[1].selected_features
print("Selected features:", selected)

Selected features: ['day_change', 'downward_pressure', 'upward_pressure', 'open', 'atr', 'cci', 'volume', 'day_variation', 'high', 'max_500', 'low', 'rsi', 'wnr', 'cmo', '2_weeks_return']


In [6]:
def provide_hoeffdingtreeclassifier(args, args_values):
    models = []
    arg_strings = []

    for values in product(*args_values):
        kwargs = dict(zip(args, values))
        model = tree.HoeffdingTreeClassifier(**kwargs)
        models.append(model)

        # Create readable string representation
        args_str = ', '.join(f"{key}={value}" for key, value in kwargs.items())
        arg_strings.append(args_str)

    return models, arg_strings

def provide_extremelyfastdecisiontreeclassifier(args, args_values):
    models = []
    arg_strings = []

    for values in product(*args_values):
        kwargs = dict(zip(args, values))
        model = tree.ExtremelyFastDecisionTreeClassifier(**kwargs)
        models.append(model)

        # Create readable string representation
        args_str = ', '.join(f"{key}={value}" for key, value in kwargs.items())
        arg_strings.append(args_str)

    return models, arg_strings

In [9]:

result_rows = []
iterations = 5

for model_name in ['hoeffdingtreeclassifier', 'extremelyfastdecisiontreeclassifier']:

    if model_name == 'hoeffdingtreeclassifier':
        models, args_strs = provide_hoeffdingtreeclassifier(
            ['grace_period', 'max_depth', 'delta'],
            [[200], [None], [1e-7]]
        )
    if model_name == 'extremelyfastdecisiontreeclassifier':
        models, args_strs = provide_extremelyfastdecisiontreeclassifier(
            ['grace_period', 'max_depth', 'delta'],
            [[200], [None], [1e-7]]
        )

    if model_name in ['hoeffdingtreeclassifier', 'extremelyfastdecisiontreeclassifier']:

        for num_features in [5, 7, 10, 15, 20]:
        
            for feature_selector_name in ['selectkbest', 'tstat']:
    
                for model, model_args in zip(models, args_strs):
            
                    for drift_name in ['adwin', 'kswin', 'pagehinkley', 'bollingerband']:
            
                        for learning_threshold in [500]:
            
                            for iteration in range(iterations):
            
                                print('''
            model_name = {model_name}
            model_args = {model_args}
            drift_name = {drift_name}
            feature_selector_name = {feature_selector_name}
            num_features = {num_features}
            learning_threshold = {learning_threshold}
            iteration = {iteration}
                                    '''.format(
            model_name=model_name,
            model_args=model_args,
            drift_name=drift_name,
            feature_selector_name=feature_selector_name,
            num_features=num_features,
            learning_threshold=learning_threshold,
            iteration=iteration+1
            ))
                                
                                stock_predictor = BaseModelStockPredictor(stock_data=stock_data,
                                                model_name=model_name,
                                                drift_name=drift_name,
                                                feature_selector_name=feature_selector_name,
                                                num_features=num_features,
                                                learning_threshold = learning_threshold
                                                )
                                
                                accuracy, metrics_result = stock_predictor.prediction()
                                
                                result_rows.append([model_name, drift_name, feature_selector_name, num_features, learning_threshold, iteration+1, round(accuracy, 3), stock_predictor.drifts_detected, model_args, ticker])
                                print('accuracy = {:.4f}'.format(accuracy))

result_df = pd.DataFrame(result_rows, columns=['model_name', 'drift_name', 'feature_selector_name', 'num_features', 'learning_threshold', 'iteration', 'accuracy', 'drifts_detected', 'model_args', 'ticker'])
result_df.to_csv('results_df_selector_2.csv', index=False)



            model_name = hoeffdingtreeclassifier
            model_args = grace_period=200, max_depth=None, delta=1e-07
            drift_name = adwin
            feature_selector_name = selectkbest
            num_features = 5
            learning_threshold = 500
            iteration = 1
                                    
accuracy = 0.5450

            model_name = hoeffdingtreeclassifier
            model_args = grace_period=200, max_depth=None, delta=1e-07
            drift_name = adwin
            feature_selector_name = selectkbest
            num_features = 5
            learning_threshold = 500
            iteration = 2
                                    
accuracy = 0.5450

            model_name = hoeffdingtreeclassifier
            model_args = grace_period=200, max_depth=None, delta=1e-07
            drift_name = adwin
            feature_selector_name = selectkbest
            num_features = 5
            learning_threshold = 500
            iteration = 3
              