# Stock Prediction Experiment

In [None]:
from models.tf.models import LongShortTermMemory
from sklearn.model_selection import GridSearchCV
from joblib import Memory
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFE
from scikeras.wrappers import KerasRegressor
from data.dataset import StockDataGenerator
from joblib import Memory
from sklearn.base import BaseEstimator
from xgboost import XGBRegressor, XGBRFRegressor
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
from pipes.steps import ClfSwitcher
from IPython.display import clear_output
import warnings
import os
import absl.logging

In [None]:
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
absl.logging.set_verbosity(absl.logging.ERROR)

In [None]:
VERBOSITY = 1

In [None]:
feature_combos = [

    {
        
    }, 

    { 
        'EMA': {}, '%B': {}, 'CCI': {}, 
        'RSI': {}, 'VIX': {}, 'MIDPRICE': {}, 
        'MAMA': {}, 'ROCR100': {}, 'MACD': {}
    },

    {
        'EMA': {}, '%B': {'timeperiod': 15}, 'CCI': {}, 
        'RSI': {}, 'VIX': {}, 'MIDPRICE': {'timeperiod': 15},
        'MAMA': {}, 'ROCR100': {}, 'AROONOSC': {}
    }
    
]

## Pipeline

In [None]:
for i, features in enumerate(feature_combos):
    print(f'Feature Set {i + 1}/' + str(len(feature_combos)))
    print('Feature: Parameters', end='\n')
    for combo in feature_combos:
        for feat, params in combo.items():
            print(feat + ': ' + (str(params) if len(params) != 0 else 'defaults'), end='\n')
    data = StockDataGenerator(
            'BLK', api='TDA',
            period='TEN_YEAR', period_type='YEAR',
            frequency='DAILY', frequency_type='DAILY',
            features=features, verbose=1, target='close', 
            lookback=0, pc=True 
    )
    lstm = LongShortTermMemory()
    lstm.compile_model(data.X_train, verbose=1, name=f'lstm_{i + 1}')
    lstm_nn = KerasRegressor(model=lstm.model)
    boosted_trees = XGBRegressor()
    boosted_forests = XGBRFRegressor()
    model_pipeline = Pipeline([
        ('clf', ClfSwitcher()),
    ])
    model_params = [

        {
            'clf__estimator': [ lstm_nn ],
            'clf__estimator__epochs': [ 50 ],
            'clf__estimator__verbose': [ 1 ],
        },
    
        {
            'clf__estimator': [ boosted_trees ],
            'clf__estimator__learning_rate': [ 1e-4, 1e-3, 1e-2, 1e-1, 1e0 ],
        },

        {
            'clf__estimator': [ boosted_forests ],
            'clf__estimator__learning_rate': [ 1e-4, 1e-3, 1e-2, 1e-1, 1e0 ],

        },

    ]
    splitter = TimeSeriesSplit(n_splits=3, gap=0, test_size=len(data.y_test))
    splits = splitter.split(data.X)
    cv = GridSearchCV(
        model_pipeline, model_params, cv=splits, 
        n_jobs=1, verbose=1, scoring=make_scorer(
            mean_squared_error, greater_is_better=False
        )
    )
    gs_results = cv.fit(data.X, data.y)
    clear_output(wait=True)

In [None]:
print(gs_results)