In [7]:

from sklearn.svm import SVC

import sklearn.metrics as metrics
import pandas as pd

from pynoahfunc.processor import Processor


class DataSplitter:
    def __init__(self, df : pd.DataFrame, target_col : str, init_size : int):
        self._input_df = df.loc[:, df.columns != target_col]
        self._target_df = df[target_col]
        self._init_size = init_size

    def __iter__(self):
        self._curr_idx = self._init_size
        return self
    

    def __next__(self):
        if self._curr_idx >= len(self._input_df):
            raise StopIteration

        train_input = []
        train_target = []

        for i in range(0, self._curr_idx):
            train_input.append(self._input_df.iloc[i].values)
            train_target.append(self._target_df.iloc[i])

        test_input = self._input_df.iloc[self._curr_idx].values
        test_target = self._target_df.iloc[self._curr_idx]

        self._curr_idx += 1

        return train_input, [test_input], train_target, [test_target]


In [8]:


FUNCTIONS = [
    "QUANTILE_RATIO(C(DGS5),89,0.10,0.90)",
    "RAVG_MEDIAN(L(EURSEKX),8)",
    "RAVG_MEDIAN(C(THREEFYTP3),55)",
    "RAVGS_RATIO(C(DFII30),3,13)",
    "RSEMS_RATIO(C(DEXHKUS),8,8)",
    "RAVG_MEDIAN(V(IAU),34)",
    "RATIO_TO_RAVG(O(IDXT100),34)",
    "RAVGS_RATIO(H(CPER),34,21)",
    "RATIO_TO_RAVG(C(BAMLC0A0CM),21)",
    "RATIO_TO_RAVG(L(CPER),21)",
    "RENTROPY(V(EURGBPX),21)",
    "RATIO_TO_RLINEAR(O(IAU),5)",
    "RKURT(V(IDXVIX),233)",
    "QUANTILE_RATIO(O(PHPX),34,0.25,0.75)",
    "RSI(O(IDXAX),34)",
    "QUANTILE_RATIO(O(IAU),55,0.25,0.75)",
    "RATIO_TO_RAVG(L(EURJPYX),55)",
    "RSEMS_RATIO(H(IDXRUT),144,55)",
    "RENTROPY(V(EURCADX),144)",
    "RAVG_MEDIAN(O(GBPUSDX),5)",
    "RAVGS_RATIO(H(SLV),89,89)",
    "STOCH_OSC_RAVG(AGGDIDL,233,89)",
    "RATIO_TO_RAVG(C(DFF),5)",
    "RSKEW(C(GVZCLS),8)",
    "RAVGS_RATIO(L(CNYX),3,13)",
    "RSI(H(CPER),21)",
    "RENTROPY(V(IDXFCHI),89)",
    "STOCH_OSC_RAVG(TQQQ,21,89)",
    "RENTROPY(L(SPXL),233)",
    "RAVGS_RATIO(C(THREEFYTP7),8,89)",
    "QUANTILE_RATIO(L(IDXFCHI),8,0.25,0.75)",
    "RENTROPY(H(NZDUSDX),5)",
    "RAVG_MEDIAN(O(AGGDIDG),3)",
    "RSI(L(SGDX),233)",
    "QUANTILE_RATIO(V(NZDUSDX),8,0.10,0.90)",
    "RAVGS_RATIO(C(DEXTAUS),13,3)",
    "STOCH_OSC_RAVG(SLV,5,21)",
    "QUANTILE_RATIO(V(IDXAORD),8,0.25,0.75)",
    "RATIO_TO_RAVG(C(IDXFTSE),8)",
    "RAVG_MEDIAN(C(BAMLHE00EHYITRIV),34)",
    "QUANTILE_RATIO(H(EURHUFX),13,0.10,0.90)",
    "RSKEW(C(IUDSOIA),5)",
    "MACDFAST(O(NZDUSDX))",
    "QUANTILE_RATIO(C(AGGDG),144,0.25,0.75)",
    "STOCH_OSC_RAVG(SLV,55,8)",
    "MACDFAST(L(EURJPYX))",
    "W_VOL_AVG(IDXNYA,13,5)",
    "RATIO_TO_RLINEAR(H(IDXBFX),5)",
    "W_VOL_AVG(AGGDIDL,13,144)",
    "RSKEW(C(THREEFF10),3)",
]


PROFIT_TARGET = "FUTURE_PERCENT_PROFIT(TQQQ,5)"

df = Processor().process("|".join(FUNCTIONS), autoscale = True, scale_type = "robust")
dftarg = Processor().process(PROFIT_TARGET)

df = pd.merge(df, dftarg, how="inner", left_index=True, right_index=True)

print(df.shape)
print(df.head())
print(df.columns)
df.dropna(inplace=True)

#change FUTURE_PERCENT_PROFIT(TQQQ,2) to binary value
df[PROFIT_TARGET] = df[PROFIT_TARGET].apply(lambda x: 1 if x > 0 else 0)

splitter = DataSplitter(df, PROFIT_TARGET, 1000)

actuals = []
preds = []

index = 1

print("Data shape: ", df.shape)

for train_input, test_input, train_target, test_target in splitter:
    #Compute the class weights
    Y_pos = sum(train_target)
    Y_neg = len(train_target) - Y_pos
    weights = {0:Y_pos/(Y_pos+Y_neg), 1:Y_neg/(Y_pos+Y_neg)}

    model = SVC(kernel="rbf", class_weight=weights)
    model.fit(train_input, train_target)

    test_pred = model.predict(test_input)

    actuals.append(test_target[0])
    preds.append(test_pred[0])
    print(f"Iteration {index} - actual: {test_target[0]}, pred: {test_pred[0]}")
    index += 1


print(metrics.confusion_matrix(actuals, preds))
print(metrics.classification_report(actuals, preds))


(3712, 51)
            QUANTILE_RATIO(C(DGS5),89,0.10,0.90)  ...  FUTURE_PERCENT_PROFIT(TQQQ,5)
index_date                                        ...                               
2010-02-11                                   NaN  ...                       0.093775
2010-02-12                                   NaN  ...                       0.060230
2010-02-16                                   NaN  ...                       0.009702
2010-02-17                                   NaN  ...                      -0.009638
2010-02-18                                   NaN  ...                      -0.043666

[5 rows x 51 columns]
Index(['QUANTILE_RATIO(C(DGS5),89,0.10,0.90)', 'RAVG_MEDIAN(L(EURSEKX),8)',
       'RAVG_MEDIAN(C(THREEFYTP3),55)', 'RAVGS_RATIO(C(DFII30),3,13)',
       'RSEMS_RATIO(C(DEXHKUS),8,8)', 'RAVG_MEDIAN(V(IAU),34)',
       'RATIO_TO_RAVG(O(IDXT100),34)', 'RAVGS_RATIO(H(CPER),34,21)',
       'RATIO_TO_RAVG(C(BAMLC0A0CM),21)', 'RATIO_TO_RAVG(L(CPER),21)',
       'RENTROPY(V(E