In [40]:
import pandas as pd
from DataLoader import DataLoader
from river import stream
from river import tree
from river import metrics
from river import preprocessing
from river import feature_selection
from river import drift
from river import stats

def ohlc_stream(df):
    for _, row in df.iterrows():
        features = row.iloc[:-1].to_dict()
        yield features, row['target']

def get_drift_detector(name: str):
    name = name.lower()
    if name == "adwin":
        return drift.ADWIN()
    elif name == "kswin":
        return drift.KSWIN(alpha=0.0001, window_size=100)
    elif name == "hddm_a":
        return drift.HDDM_A()
    elif name == "hddm_w":
        return drift.HDDM_W()
    else:
        raise ValueError(f"Unknown detector")

def get_pipeline():
    model = tree.HoeffdingTreeClassifier(grace_period=30)
    feature_selector = feature_selection.SelectKBest(k=7, similarity=stats.PearsonCorr())
    scaler = preprocessing.StandardScaler()
    pipeline = scaler | feature_selector | model
    return pipeline

def main(df, drift_name="adwin"):
    data_stream = ohlc_stream(df)

    pipeline = get_pipeline()
    if pipeline is None:
        raise ValueError("Pipeline could not be created. Check get_pipeline().")
    drift_detector = get_drift_detector(drift_name)
    metric = metrics.ClassificationReport()

    for i, (x, y) in enumerate(data_stream):

        y_pred = pipeline.predict_one(x)
        pipeline.learn_one(x, y)

        error = int(y_pred != y) if y_pred is not None else 0
        drift_detector.update(error)

        if y_pred is not None:
            metric.update(y, y_pred)

        if drift_detector.drift_detected:
            print(f'Drift detected at index {i}! ({drift_name})')

        if i % 500 == 0 and i > 0:
            print(f'[{i} sample] \n{metric}')


In [4]:
dataLoader = DataLoader()
dataLoader.get_data_locally('AAPL')
dataLoader.add_features_and_target()
stock_data = dataLoader.data_with_target

In [41]:
# Example Usage
main(stock_data, drift_name="adwin")

[500 sample] 
           Precision   Recall   F1       Support  
                                                  
      -1      51.44%   63.39%   56.79%       254  
       1      50.27%   38.21%   43.42%       246  
                                                  
   Macro      50.85%   50.80%   50.10%            
   Micro      51.00%   51.00%   51.00%            
Weighted      50.86%   51.00%   50.21%            

                 51.00% accuracy                  
[1000 sample] 
           Precision   Recall   F1       Support  
                                                  
      -1      53.89%   68.97%   60.50%       522  
       1      51.20%   35.56%   41.98%       478  
                                                  
   Macro      52.55%   52.27%   51.24%            
   Micro      53.00%   53.00%   53.00%            
Weighted      52.61%   53.00%   51.65%            

                 53.00% accuracy                  
[1500 sample] 
           Precision   Recall   F1  