In [1]:
import pandas as pd
from DataLoader import DataLoader
from river import stream
from river import tree
from river import metrics
from river import preprocessing
from river import feature_selection
from river import drift
from river import stats

def ohlc_stream(df):
    for _, row in df.iterrows():
        features = row.iloc[:-1].to_dict()
        yield features, row['target']

def get_drift_detector(name: str):
    name = name.lower()
    if name == "adwin":
        return drift.ADWIN()
    elif name == "kswin":
        return drift.KSWIN(alpha=0.0001, window_size=100)
    elif name == "hddm_a":
        return drift.HDDM_A()
    elif name == "hddm_w":
        return drift.HDDM_W()
    else:
        raise ValueError(f"Unknown detector")

def get_pipeline():
    model = tree.HoeffdingTreeClassifier(grace_period=30)
    feature_selector = feature_selection.SelectKBest(k=7, similarity=stats.PearsonCorr())
    scaler = preprocessing.StandardScaler()
    pipeline = scaler | feature_selector | model
    return pipeline

def main(df, drift_name="adwin"):
    data_stream = ohlc_stream(df)

    pipeline = get_pipeline()
    if pipeline is None:
        raise ValueError("Pipeline could not be created. Check get_pipeline().")
    drift_detector = get_drift_detector(drift_name)
    metric = metrics.ClassificationReport()

    for i, (x, y) in enumerate(data_stream):

        y_pred = pipeline.predict_one(x)
        pipeline.learn_one(x, y)

        error = int(y_pred != y) if y_pred is not None else 0
        drift_detector.update(error)

        if y_pred is not None:
            metric.update(y, y_pred)

        if drift_detector.drift_detected:
            print(f'Drift detected at index {i}! ({drift_name})')

        if i % 500 == 0 and i > 0:
            print(f'[{i} sample] \n{metric}')


In [11]:
dataLoader = DataLoader()

# dataLoader.get_data_locally('AAPL')

dataLoader.get_data('AAPL', '10y')
dataLoader.raw_data.drop('date', axis=1, inplace=True)


In [12]:
dataLoader.add_features_and_target()
stock_data = dataLoader.data_with_target

In [13]:
# Example Usage
main(stock_data, drift_name="adwin")

[500 sample] 
           Precision   Recall   F1       Support  
                                                  
      -1      38.98%    9.66%   15.49%       238  
       1      51.25%   86.26%   64.30%       262  
                                                  
   Macro      45.12%   47.96%   39.89%            
   Micro      49.80%   49.80%   49.80%            
Weighted      45.41%   49.80%   41.06%            

                 49.80% accuracy                  
[1000 sample] 
           Precision   Recall   F1       Support  
                                                  
      -1      38.98%    4.97%    8.81%       463  
       1      53.24%   93.30%   67.79%       537  
                                                  
   Macro      46.11%   49.13%   38.30%            
   Micro      52.40%   52.40%   52.40%            
Weighted      46.64%   52.40%   40.49%            

                 52.40% accuracy                  
[1500 sample] 
           Precision   Recall   F1  