In [None]:
import pandas as pd
from DataLoader import DataLoader
from river import stream
from river import tree
from river import metrics
from river import preprocessing
from river import feature_selection
from river import drift
from river import stats

class StockPredictor:

    def __init__(self, stock_data, drift_name, provided_detector=None):
        self.stock_data = stock_data
        self.data_stream = StockPredictor.ohlc_stream(stock_data)
        self.pipeline = StockPredictor.get_pipeline()
        self.drift_name = drift_name
        if isinstance(provided_detector, str):
            self.drift_detector = StockPredictor.get_drift_detector(drift_name)
        else:
            self.drift_detector = provided_detector
        self.metric = metrics.ClassificationReport()

    @staticmethod
    def ohlc_stream(df):
        for _, row in df.iterrows():
            features = row.iloc[:-1].to_dict()
            yield features, row['target']

    @staticmethod
    def get_drift_detector(name: str, provided_detector=None):
        name = name.lower()
        if name == "adwin":
            return drift.ADWIN()
        elif name == "kswin":
            return drift.KSWIN()
        elif name == "dummydriftdetector":
            return drift.DummyDriftDetector()
        elif name == "pagehinkley":
            return drift.PageHinkley()
        else:
            raise ValueError(f"Unknown detector")

    @staticmethod
    def get_pipeline():
        model = tree.HoeffdingTreeClassifier(grace_period=30, max_depth=5)
        feature_selector = feature_selection.SelectKBest(k=7, similarity=stats.PearsonCorr())
        scaler = preprocessing.StandardScaler()
        pipeline = scaler | feature_selector | model
        return pipeline

    def prediction(self):

        for i, (x, y) in enumerate(self.data_stream):

            y_pred = self.pipeline.predict_one(x)
            self.pipeline.learn_one(x, y)

            error = int(y_pred != y) if y_pred is not None else 0
            self.drift_detector.update(error)

            if y_pred is not None:
                self.metric.update(y, y_pred)

            if self.drift_detector.drift_detected:
                print(f'Drift detected at index {i}! ({self.drift_name})')
                

        # print(self.metric)

        accuracy, metrics_result = self.get_metrics()
        print(f'accuracy: {accuracy}')
        display(metrics_result)

    def get_metrics(self):

        classes = sorted(self.metric.cm.classes)

        for c in classes:
            if c not in self.metric._f1s:
                self.metric._f1s[c] = metrics.F1(cm=self.metric.cm, pos_val=c)
                
        accuracy = round(self.metric._accuracy.get(), 3)

        # print(self.metric._f1s)
        metrics_result = pd.DataFrame([ [-1, self.metric._f1s[-1].precision.get(), self.metric._f1s[-1].recall.get(), self.metric._f1s[-1].get()],
                                        [1, self.metric._f1s[1].precision.get(), self.metric._f1s[1].recall.get(), self.metric._f1s[1].get()]],
                                        columns=['class', 'precision', 'recall', 'f1'])
        
        metrics_result = metrics_result.round(3)

        return accuracy, metrics_result
    


### DATA LOADING

In [6]:
dataLoader = DataLoader() # if yahoo does not work use "dataLoader.get_data_locally('AAPL')"
stock_data = dataLoader.pipeline('AAPL')

### PREDICTION

In [83]:
adwin_detector = drift.ADWIN(delta=0.002, clock=32, max_buckets=5, min_window_length=5, grace_period=10)
kswin_detector = drift.KSWIN(alpha = 0.005, window_size = 100, stat_size = 30)
page_hinley_detector = drift.PageHinkley(min_instances = 30, delta = 0.005, threshold = 50.0, alpha = 0.9999, mode = "both")

In [None]:
stock_predictor = StockPredictor(stock_data=stock_data, 
                                 drift_name='adwin', 
                                 provided_detector = drift.PageHinkley(min_instances = 30, delta = 0.005, threshold = 50.0, alpha = 0.9999, mode = "both"))
stock_predictor.prediction()

Drift detected at index 5752! (adwin)
accuracy: 0.529


Unnamed: 0,class,precision,recall,f1
0,-1,0.525,0.593,0.557
1,1,0.534,0.465,0.497


In [None]:
# TODO add tree.StochasticGradientTrees as model
# TODO add other models (MLP, RandomForest, XGBoost)
# TODO do something when drift occurs
# TODO add new drift detectors
# TODO grid search of different models and drift detectors
