In [24]:
from database.strategy import Strategy
from database.market import Market
from transformer.date_transformer import DateTransformer
from transformer.column_transformer import ColumnTransformer
from transformer.model_transformer import ModelTransformer
from transformer.product_transformer import ProductTransformer
from transformer.predictor_transformer import PredictorTransformer
from preprocessor.model_preprocessor import ModelPreprocessor
from preprocessor.predictor_preprocessor import PredictorPreprocessor
from modeler.modeler import Modeler as sp
from utils.date_utils import DateUtils
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, timezone
from tqdm import tqdm
import math
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression, SGDRegressor, RidgeCV, SGDClassifier, RidgeClassifier, LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_squared_log_error, accuracy_score, mean_absolute_percentage_error
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split, HalvingGridSearchCV

In [2]:
## Loading Constants
start = "2008-01-01"
end = datetime(2021,1,7).strftime("%Y-%m-%d")
# Loading Databases
strat_db = Strategy("unity")
market = Market()
market.connect()
tickers = market.retrieve_data("sp500").sort_values("Symbol")
market.close()
reload = True
quarterly_range = range(1,2)
yearly_range = range(2018,2019)
dataset = "pdr"

In [3]:
market.connect()
classification = market.retrieve_data("dataset_pdr_week_classification")
regression = market.retrieve_data("dataset_pdr_week_regression")
market.close()
for col in regression.columns:
    if -99999 == regression[col].min():
        regression.drop(col,axis=1,inplace=True)
for col in classification.columns:
    if -99999 == classification[col].min():
        classification.drop(col,axis=1,inplace=True)

In [4]:
sims = []
gap = 5
week_gap = int(gap/5)
training_years = 7
timeline = DateUtils.create_timeline(start,end)
sims = []
strat_db.connect()
market.connect()
market.drop_table("pdr_weekly_sim")
for year in tqdm(yearly_range):
    for quarter in tqdm(quarterly_range):
        try:
            for ticker in tickers["Symbol"][0:1]:
                try:
                    if ticker in regression.columns:
                        price = market.retrieve_price_data("{}_prices".format(dataset),ticker)
                        if dataset == "pdr":
                            price = ColumnTransformer.rename_columns(price," ")
                        else:
                            price = ColumnTransformer.rename_columns(price,"")
                        price = DateTransformer.convert_to_date(dataset,price,"date")
                        mr = ModelPreprocessor(ticker)
                        prot = ProductTransformer(ticker,start,end)
                        ticker_regression = regression
                        ## regression_model
                        first = ticker_regression[(ticker_regression["year"] == year - training_years) & (ticker_regression["quarter"] == quarter)].index.values.tolist()[0]
                        last = ticker_regression[(ticker_regression["year"] == year) & (ticker_regression["quarter"] == quarter)].index.values.tolist()[0]
                        rqpd = ticker_regression.iloc[first:last-1]
                        rqpd["y"] = rqpd[ticker]
                        rqpd["y"] = rqpd["y"].shift(-week_gap)
                        rqpd = rqpd[:-week_gap]
                        qpd = mr.day_trade_preprocess_regression(rqpd.copy(),ticker,True)
                        rpr = sp.regression(qpd,ranked=False,tf=True,deep=True)
#                         ## classification_model
#                         ticker_classification = classification
#                         first = ticker_classification[(ticker_classification["year"] == year - training_years) & (ticker_classification["quarter"] == quarter)].index.values.tolist()[0]
#                         last = ticker_classification[(ticker_classification["year"] == year) & (ticker_classification["quarter"] == quarter)].index.values.tolist()[0]
#                         cqpd = ticker_classification.iloc[first:last-1]
#                         cqpd["y"] = cqpd[ticker]
#                         cqpd["y"] = cqpd["y"].shift(-week_gap)
#                         cqpd = cqpd[:-week_gap]
#                         qpd = mr.day_trade_preprocess_classify(cqpd.copy(),ticker)
#                         q2c = qpd["X"].columns
#                         cpr = sp.classification(qpd,tf=False,deep=False)
#                         price_results = pd.DataFrame([cpr,rpr])
#                         product_qpds = []
#                         current_sets = []
                except Exception as e:
                    message = {"status":"weekly modeling","ticker":ticker,"year":str(year),"quarter":str(quarter),"message":str(e)}
                    print(message)
        except Exception as e:
            print(year,week,str(e))
market.close()
strat_db.close()

  0%|                                                                                                                                                | 0/1 [00:00<?, ?it/s]
  0%|                                                                                                                                                | 0/1 [00:00<?, ?it/s][A

sgd
r
Invalid parameter alpha for estimator RidgeCV(alphas=array([ 0.1,  1. , 10. ])). Check the list of available parameters with `estimator.get_params().keys()`.
lr
logr
Unknown label type: 'continuous'


To change all layers to have dtype float32 by default, call `tf.keras.backend.set_floatx('float32')`. To change just this layer, pass dtype='float32' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:13<00:00, 13.87s/it][A
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:13<00:00, 13.87s/it]


In [56]:
stuff = {
    "sgd" : {"model":SGDRegressor(),"params":{"loss":["squared_loss","huber"]
                                                    ,"learning_rate":["constant","optimal","adaptive"]
                                                    ,"alpha" : [0.0001,0.001, 0.01, 0.1, 0.2, 0.5, 1]}},
        "r" : {"model":RidgeCV(alphas=[0.0001,0.001, 0.01, 0.1, 0.2, 0.5, 1]),"params":{}},
        "lr" : {"model":LinearRegression(),"params":{"fit_intercept":[True,False]}}
        }

In [57]:
data = qpd
X_train, X_test, y_train, y_test = train_test_split(data["X"], data["y"],train_size=0.75, test_size=0.25, random_state=42)
deep = True
results = []
for regressor in stuff:
    print(regressor)
    try:
        model = stuff[regressor]["model"].fit(X_train,y_train)
        params = stuff[regressor]["params"]
        if not deep:
            model.fit(X_train,y_train)
        else:
            gs = HalvingGridSearchCV(model,params,cv=10,scoring="neg_mean_squared_error")
            gs.fit(X_train,y_train)
            model = gs.best_estimator_
        y_pred = model.predict(X_test)
        accuracy = r2_score(y_test,y_pred)
        result = {"api":"skl","model":model,"score":accuracy}
        results.append(result)
    except Exception as e:
        print(str(e))
        result = {"api":"skl","model":str(e),"score":-99999}
        results.append(result)
        continue

sgd
r
lr


In [55]:
results

[{'api': 'skl',
  'model': RidgeCV(alphas=array([1.e-04, 1.e-03, 1.e-02, 1.e-01, 2.e-01, 5.e-01, 1.e+00])),
  'score': 0.9940353269711533},
 {'api': 'skl', 'model': LinearRegression(), 'score': 0.9792617895065339}]