# Stock Prediction Experiment

In [39]:
from models.tf.models import LongShortTermMemory
from sklearn.model_selection import GridSearchCV
from joblib import Memory
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFE
from scikeras.wrappers import KerasRegressor
from data.dataset import StockDataGenerator
from joblib import Memory
from sklearn.base import BaseEstimator
from xgboost import XGBRegressor, XGBRFRegressor
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
from pipes.steps import ClfSwitcher
from IPython.display import clear_output
import warnings
import os
import absl.logging
from sklearn.metrics import make_scorer

In [40]:
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
absl.logging.set_verbosity(absl.logging.ERROR)

In [41]:
VERBOSITY = 1

In [42]:
feature_combos = [

    { 
        'EMA': {}, '%B': {}, 'CCI': {}, 
        'RSI': {}, 'VIX': {},
        'MAMA': {}, 'ROCR100': {}, 'MACD': {}
    },

    {
        'EMA': {}, '%B': {'timeperiod': 15}, 'CCI': {}, 
        'RSI': {}, 'VIX': {}, 'MIDPRICE': {'timeperiod': 15},
        'MAMA': {}, 'ROCR100': {}, 'AROONOSC': {}
    }
    
]

## Pipeline

In [43]:
def up_or_down_accuracy(y_true, y_pred):
    correct_count = 0
    for i in range(len(y_true)):
        if y_true[i] > 0 and y_pred > 0:
            correct_count += 1
        elif y_true[i] < 0 and y_pred < 0:
            correct_count += 1
        elif y_pred == 0 and y_true[i] == 0:
            correct_count += 1
    return correct_count / len(y_true)

score = make_scorer(up_or_down_accuracy, greater_is_better=True)


In [44]:
for i, features in enumerate(feature_combos):
    print(f'Feature Set {i + 1}/' + str(len(feature_combos)))
    print('Feature: Parameters', end='\n')
    for combo in feature_combos:
        for feat, params in combo.items():
            print(feat + ': ' + (str(params) if len(params) != 0 else 'defaults'), end='\n')
    data = StockDataGenerator(
            'BLK', api='TDA',
            period='TEN_YEAR', period_type='YEAR',
            frequency='DAILY', frequency_type='DAILY',
            features=features, verbose=1, target='close', 
            lookback=0, pc=True 
    )
    lstm = LongShortTermMemory()
    lstm.compile_model(data.X_train, verbose=1, name=f'lstm_{i + 1}')
    lstm_nn = KerasRegressor(model=lstm.model)
    boosted_trees = XGBRegressor()
    boosted_forests = XGBRFRegressor()
    model_pipeline = Pipeline([
        ('clf', ClfSwitcher()),
    ])
    model_params = [

        {
            'clf__estimator': [ lstm_nn ],
            'clf__estimator__epochs': [ 25 ],
            'clf__estimator__verbose': [ 1 ],
        },
    
        {
            'clf__estimator': [ boosted_trees ],
            'clf__estimator__learning_rate': [ 1e-4, 1e-3, 1e-2, 1e-1, 1e0 ],
            'clf__estimator__n_estimators': [ 50, 100, 200, 300 ]
        },

        {
            'clf__estimator': [ boosted_forests ],
            'clf__estimator__learning_rate': [ 1e-4, 1e-3, 1e-2, 1e-1, 1e0 ],
            'clf__estimator__gamma': [ 0.25, 0.50, 0.75, 1.0 ]

        },

    ]
    splitter = TimeSeriesSplit(n_splits=3, gap=0, test_size=len(data.y_test))
    splits = splitter.split(data.X)
    cv = GridSearchCV(
        model_pipeline, model_params, cv=splits, 
        n_jobs=1, verbose=2, scoring=make_scorer(
            mean_squared_error, greater_is_better=False
        ), return_train_score=True
    )
    gs_results = cv.fit(data.X, data.y)
    clear_output(wait=True)

Feature Set 1/2
Feature: Parameters
EMA: defaults
%B: defaults
CCI: defaults
RSI: defaults
VIX: defaults
MAMA: defaults
ROCR100: defaults
MACD: defaults
EMA: defaults
%B: {'timeperiod': 15}
CCI: defaults
RSI: defaults
VIX: defaults
MIDPRICE: {'timeperiod': 15}
MAMA: defaults
ROCR100: defaults
AROONOSC: defaults
Normalizing data...
Scaling data...
Model: "lstm_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_32 (LSTM)              (None, 16, 200)           161600    
                                                                 
 dropout_31 (Dropout)        (None, 16, 200)           0         
                                                                 
 lstm_33 (LSTM)              (None, 16, 100)           120400    
                                                                 
 dropout_32 (Dropout)        (None, 16, 100)           0         
                                         

Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000gn/T/tmph3h082jj/assets
Assets written to: ram:///var/folders/fg/ncsjj6cj3v5935fgygbnt4c80000g

Epoch 1/25
 5/43 [==>...........................] - ETA: 8s - loss: 0.5403 - mae: 0.6836 - rmse: 0.7351 

KeyboardInterrupt: 

In [None]:
print(gs_results)

GridSearchCV(cv=<generator object TimeSeriesSplit.split at 0x5460e9e40>,
             estimator=Pipeline(steps=[('clf', ClfSwitcher())]), n_jobs=1,
             param_grid=[{'clf__estimator': [KerasRegressor(epochs=25, model=<keras.engine.sequential.Sequential object at 0x47241cd90>)],
                          'clf__estimator__epochs': [25],
                          'clf__estimator__verbose': [1]},
                         {'clf__estimator': [XGBRegressor(base_score...
                                                            num_parallel_tree=None,
                                                            objective='reg:squarederror',
                                                            predictor=None,
                                                            random_state=None,
                                                            reg_alpha=None,
                                                            sampling_method=None,
                                     

In [None]:
(gs_results.cv_results_['mean_train_score'])

array([-8.05607293e-03, -1.27020347e+00, -1.25766146e+00, -1.23294865e+00,
       -1.20872245e+00, -1.16164080e+00, -1.05189281e+00, -8.62576691e-01,
       -7.07366574e-01, -4.73648130e-01, -1.75091626e-01, -2.40572488e-02,
       -3.36863113e-03, -6.98160278e-05, -1.20285857e-05, -3.67238349e-06,
       -1.69259714e-06, -1.26068719e-06, -3.49475531e-07, -3.49475472e-07,
       -3.49475493e-07, -1.28261464e+00, -1.28031003e+00, -1.25737611e+00,
       -1.03944656e+00, -1.05791144e-03, -1.28261471e+00, -1.28031165e+00,
       -1.25739194e+00, -1.03959643e+00, -1.73907911e-03, -1.28261468e+00,
       -1.28031348e+00, -1.25740980e+00, -1.03976590e+00, -2.49747662e-03,
       -1.28261473e+00, -1.28031441e+00, -1.25741904e+00, -1.03985417e+00,
       -3.00368119e-03])