In [1]:
from classification import TStatFeatureSelector, BollingerBandDriftDetector, FixedSizeBuffer, BaseModelStockPredictor
from DataLoader import DataLoader
import pandas as pd
from itertools import product
from river import tree

### DATA LOADING

In [8]:
stock_data = pd.read_csv("AAPL_full.csv")
ticker = 'AAPL'

In [2]:
ticker = 'AAPL'
dataLoader = DataLoader() # if yahoo does not work use "dataLoader.get_data_locally('AAPL')"
stock_data = dataLoader.pipeline(ticker)

In [3]:
stock_data

Unnamed: 0,open,close,high,low,volume,max_5,min_5,max_10,min_10,max_20,...,stochastic_fast,stochastic_slow,%r,atr,cmo,cci,mom,bias,wnr,target
0,0.064731,0.064731,0.065159,0.064731,34272000,0.065588,0.063445,0.065588,0.063016,0.066875,...,69.988126,73.320630,42.871125,0.001194,33.333609,38.811075,0.003001,0.666650,-0.333332,0
1,0.064731,0.064731,0.065159,0.064731,37408000,0.065588,0.064302,0.065588,0.063445,0.066875,...,69.988126,66.655392,50.014527,0.001194,22.222340,38.747826,0.001715,0.398934,-0.388888,0
2,0.064731,0.064302,0.065159,0.064302,76092800,0.065588,0.064302,0.065588,0.063445,0.065588,...,59.989922,66.655392,66.676448,0.001194,-6.666682,31.136188,-0.000429,-0.199567,-0.533333,0
3,0.063016,0.062159,0.063016,0.062159,25244800,0.064731,0.062159,0.065588,0.062159,0.065588,...,9.998639,46.658896,100.000000,0.001133,-22.222077,-88.315492,-0.001715,-3.268648,-0.611110,1
4,0.063873,0.063873,0.064302,0.063873,31315200,0.064731,0.062159,0.065588,0.062159,0.065588,...,44.435310,38.141290,55.564690,0.001255,0.000000,-7.832802,0.000000,-0.600392,-0.500000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10951,200.710007,201.360001,202.750000,199.699997,46742400,211.259995,201.360001,212.929993,198.270004,213.040634,...,39.932454,49.496742,75.516075,5.175482,15.140598,-53.982399,4.128632,-3.011385,-0.424297,0
10952,193.669998,195.270004,197.699997,193.460007,78432900,208.779999,195.270004,212.929993,195.270004,213.040634,...,10.853671,31.401398,91.162119,5.230436,-9.282185,-109.785045,-3.000000,-5.808633,-0.546411,1
10953,198.300003,200.210007,200.740005,197.429993,56288500,206.860001,195.270004,212.929993,195.270004,213.040634,...,34.441412,28.409179,67.041009,5.362205,-42.764716,-61.870892,-10.579987,-2.930364,-0.713824,1
10954,200.589996,200.419998,202.729996,199.899994,45339700,202.089996,195.270004,212.330002,195.270004,213.040634,...,33.984340,26.426474,66.015660,5.122784,-54.844385,-41.233342,-12.509995,-2.235578,-0.774222,0


### PREDICTION (EXAMPLE USAGE)

In [4]:
stock_predictor = BaseModelStockPredictor(stock_data=stock_data, 
                                 model_name='hoeffdingtreeclassifier',
                                 drift_name='adwin',
                                 feature_selector_name='tstat',
                                 learning_threshold = 1000
                                 )
stock_predictor.prediction()

(0.525,
    class  precision  recall     f1
 0      0      0.519   0.537  0.528
 1      1      0.531   0.513  0.522)

In [5]:
selected = stock_predictor.pipeline[1].selected_features
print("Selected features:", selected)

Selected features: ['upward_pressure', 'min_10', 'min_20', 'sma_10', 'min_5', 'open', 'sma_5', 'low', 'sma_20', 'max_5', 'min_40', 'high', 'max_10', 'close', 'sma_50']


In [6]:
def provide_hoeffdingtreeclassifier(args, args_values):
    models = []
    arg_strings = []

    for values in product(*args_values):
        kwargs = dict(zip(args, values))
        model = tree.HoeffdingTreeClassifier(**kwargs)
        models.append(model)

        # Create readable string representation
        args_str = ', '.join(f"{key}={value}" for key, value in kwargs.items())
        arg_strings.append(args_str)

    return models, arg_strings

def provide_extremelyfastdecisiontreeclassifier(args, args_values):
    models = []
    arg_strings = []

    for values in product(*args_values):
        kwargs = dict(zip(args, values))
        model = tree.ExtremelyFastDecisionTreeClassifier(**kwargs)
        models.append(model)

        # Create readable string representation
        args_str = ', '.join(f"{key}={value}" for key, value in kwargs.items())
        arg_strings.append(args_str)

    return models, arg_strings

In [10]:

result_rows = []
iterations = 5

for model_name in ['nochange', 'majorityclass', 'hoeffdingtreeclassifier', 'extremelyfastdecisiontreeclassifier']:

    if model_name == 'hoeffdingtreeclassifier':
        models, args_strs = provide_hoeffdingtreeclassifier(
            ['grace_period', 'max_depth', 'delta'],
            [[200], [None], [1e-7]]
        )
    if model_name == 'extremelyfastdecisiontreeclassifier':
        models, args_strs = provide_extremelyfastdecisiontreeclassifier(
            ['grace_period', 'max_depth', 'delta'],
            [[200], [None], [1e-7]]
        )

    if model_name in ['nochange', 'majorityclass', 'hoeffdingtreeclassifier', 'extremelyfastdecisiontreeclassifier']:

        for num_features in [5, 7, 10, 15, 20]:
        
            for feature_selector_name in ['selectkbest', 'tstat']:
    
                for model, model_args in zip(models, args_strs):
            
                    for drift_name in ['adwin', 'kswin', 'pagehinkley', 'bollingerband']:
            
                        for learning_threshold in [500]:
            
                            for iteration in range(iterations):
            
                                print('''
            model_name = {model_name}
            model_args = {model_args}
            drift_name = {drift_name}
            feature_selector_name = {feature_selector_name}
            num_features = {num_features}
            learning_threshold = {learning_threshold}
            iteration = {iteration}
                                    '''.format(
            model_name=model_name,
            model_args=model_args,
            drift_name=drift_name,
            feature_selector_name=feature_selector_name,
            num_features=num_features,
            learning_threshold=learning_threshold,
            iteration=iteration+1
            ))
                                
                                stock_predictor = BaseModelStockPredictor(stock_data=stock_data,
                                                model_name=model_name,
                                                drift_name=drift_name,
                                                feature_selector_name=feature_selector_name,
                                                num_features=num_features,
                                                learning_threshold = learning_threshold
                                                )
                                
                                accuracy, metrics_result = stock_predictor.prediction()
                                
                                result_rows.append([model_name, drift_name, feature_selector_name, num_features, learning_threshold, iteration+1, round(accuracy, 3), stock_predictor.drifts_detected, model_args, ticker])
                                print('accuracy = {:.4f}'.format(accuracy))

result_df = pd.DataFrame(result_rows, columns=['model_name', 'drift_name', 'feature_selector_name', 'num_features', 'learning_threshold', 'iteration', 'accuracy', 'drifts_detected', 'model_args', 'ticker'])
result_df.to_csv('results_df_selector_synthetic.csv', index=False)



            model_name = nochange
            model_args = grace_period=200, max_depth=None, delta=1e-07
            drift_name = adwin
            feature_selector_name = selectkbest
            num_features = 5
            learning_threshold = 500
            iteration = 1
                                    
accuracy = 0.5010

            model_name = nochange
            model_args = grace_period=200, max_depth=None, delta=1e-07
            drift_name = adwin
            feature_selector_name = selectkbest
            num_features = 5
            learning_threshold = 500
            iteration = 2
                                    
accuracy = 0.5010

            model_name = nochange
            model_args = grace_period=200, max_depth=None, delta=1e-07
            drift_name = adwin
            feature_selector_name = selectkbest
            num_features = 5
            learning_threshold = 500
            iteration = 3
                                    
accuracy = 0.5010

   

  res = hypotest_fun_out(*samples, **kwds)


accuracy = 0.5010

            model_name = nochange
            model_args = grace_period=200, max_depth=None, delta=1e-07
            drift_name = kswin
            feature_selector_name = selectkbest
            num_features = 5
            learning_threshold = 500
            iteration = 2
                                    
accuracy = 0.5010

            model_name = nochange
            model_args = grace_period=200, max_depth=None, delta=1e-07
            drift_name = kswin
            feature_selector_name = selectkbest
            num_features = 5
            learning_threshold = 500
            iteration = 3
                                    
accuracy = 0.5010

            model_name = nochange
            model_args = grace_period=200, max_depth=None, delta=1e-07
            drift_name = kswin
            feature_selector_name = selectkbest
            num_features = 5
            learning_threshold = 500
            iteration = 4
                                    
accu