In [4]:
import pandas as pd
import numpy as np
import joblib

class B3nellyDataFrameHandler:
    
    def __init__(self, df=None):
        if df is None:
            df = pd.DataFrame()
        self.df = df

    def clean_df(self, 
                 remove_cols_by_names=None, 
                 drop_na=True, 
                 to_lowercase=True, 
                 rename_cols=None
                ):
        self.remove_cols(remove_cols_by_names)
        self.drop_na(drop_na)
        self.rename_cols(rename_cols)
        self.set_col_names_to_lowercase(to_lowercase)

    def remove_cols(self, remove_col_names=None):
        if remove_col_names is None:
            return
        self.df = self.df.drop(remove_col_names, axis=1)

    def set_col_names_to_lowercase(self, to_lowercase=True):
        if to_lowercase:
            self.df.columns = self.df.columns.str.lower()

    def drop_na(self, drop_na=True):
        if drop_na:
            self.df = self.df.dropna()

    def rename_cols(self, new_col_names=None):
        if new_col_names is None:
            return

        for old_col_name in new_col_names.keys():
            if old_col_name not in self.df.columns:
                raise ValueError(f"Column '{old_col_name}' was not found in DataFrame")

        self.df = self.df.rename(columns=new_col_names)

    def prepare_df(self, predict_on_col_name="close", prediction_col_name="tomorrow", steps_forward=1):
        self.set_tomorrow(predict_on_col_name, prediction_col_name, steps_forward)
        self.set_training_target(self, target_col_name="target")
        self.drop_na(drop_na)

    def set_tomorrow(self, predict_on_col_name="close", prediction_col_name="tomorrow", steps_forward=1):
        if predict_on_col_name not in self.df.columns:
            raise ValueError(f"Column '{predict_on_col_name}' not found in DataFrame")

        self.df[prediction_col_name] = self.df[predict_on_col_name].shift(-steps_forward)

    def set_training_target(self,prediction_col_name, predict_on_col_name, target_col_name="target"):
        if prediction_col_name not in self.df.columns or predict_on_col_name not in self.df.columns:
            raise ValueError(f"Both prediction_col_name:{prediction_col_name} and predict_on_col_name:{predict_on_col_name} columns must exist in the DataFrame.")

        self.df[target_col_name] = (self.df["tomorrow"] > self.df["close"]).astype(int)