In [1]:
from database.strategy import Strategy
from database.sec import SEC
from database.market import Market
from transformer.date_transformer import DateTransformer
from transformer.column_transformer import ColumnTransformer
from transformer.model_transformer import ModelTransformer
from transformer.product_transformer import ProductTransformer
from transformer.predictor_transformer import PredictorTransformer
from preprocessor.model_preprocessor import ModelPreprocessor
from preprocessor.predictor_preprocessor import PredictorPreprocessor
from modeler.modeler import Modeler as sp
from utils.date_utils import DateUtils
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, timezone
from tqdm import tqdm
import math
import numpy as np

In [2]:
## Loading Constants
start = "2008-01-01"
end = datetime(2021,1,7).strftime("%Y-%m-%d")
# Loading Databases
strat_db = Strategy("unity")
market = Market()
sec = SEC("sec")
market.connect()
tickers = market.retrieve_data("sp500").sort_values("Symbol")
correlations = market.retrieve_data("coint")
market.close()
reload = True
quarterly_range = range(4,5)
yearly_range = range(2020,2021)
dataset = "pdr"

In [3]:
market.connect()
classification = market.retrieve_data("dataset_pdr_week_classification")
regression = market.retrieve_data("dataset_pdr_week_regression")
quarter_classification = market.retrieve_data("dataset_pdr_quarter_classification")
quarter_regression = market.retrieve_data("dataset_pdr_quarter_regression")
accurate = market.retrieve_data("accurate")
relevant_tickers = list(accurate["ticker"])
relevant_tickers.extend(["date","year","quarter","week"])
market.close()

In [4]:
for col in regression.columns:
    if -99999 == regression[col].min() or col not in relevant_tickers:
        regression.drop(col,axis=1,inplace=True)
for col in classification.columns:
    if -99999 == classification[col].min() or col not in relevant_tickers:
        classification.drop(col,axis=1,inplace=True)
for col in regression.columns:
    if -99999 == quarter_regression[col].min() or col not in relevant_tickers:
        quarter_regression.drop(col,axis=1,inplace=True)
for col in quarter_classification.columns:
    if -99999 == quarter_classification[col].min() or col not in relevant_tickers:
        quarter_classification.drop(col,axis=1,inplace=True)

In [5]:
percent_changes = quarter_regression.copy()

In [6]:
for col in percent_changes.columns:
    if col not in ["year","quarter","week","_id"]:
        percent_changes[col] = percent_changes[col].pct_change()

In [7]:
pc = percent_changes.dropna()

In [8]:
quarterlies = []
for row in pc.iterrows():
    analysis = []
    values = row[1]
    for col in pc.columns:
        if col not in ["year","quarter","week","_id"]:
            analysis.append({"year":row[1]["year"],"quarter":row[1]["quarter"],"ticker":col,"gain":row[1][col]})
    quarterlies.append(pd.DataFrame(analysis).sort_values("gain",ascending=False).head(20))

In [9]:
tickers.columns

Index(['_id', 'Symbol', 'Security', 'SEC filings', 'GICS Sector',
       'GICS Sub-Industry', 'Headquarters Location', 'Date first added', 'CIK',
       'Founded'],
      dtype='object')

In [10]:
sp5 = tickers.copy()
sp5.rename(columns={"Symbol":"ticker"},inplace=True)
q = pd.concat(quarterlies).merge(sp5[["ticker","Security","GICS Sector",'GICS Sub-Industry']],how="left",on="ticker")

In [28]:
industry_analysis = q.groupby(["year","GICS Sector"]).mean().reset_index()

In [30]:
industry_analysis[industry_analysis["year"] > 2017]

Unnamed: 0,year,GICS Sector,quarter,gain
99,2018,Communication Services,1.333333,0.285406
100,2018,Consumer Discretionary,2.263158,0.243331
101,2018,Consumer Staples,3.875,0.135557
102,2018,Energy,2.0,0.244233
103,2018,Financials,3.0,0.138957
104,2018,Health Care,2.823529,0.250636
105,2018,Industrials,1.333333,0.229988
106,2018,Information Technology,2.076923,0.271761
107,2018,Materials,3.0,0.160316
108,2018,Real Estate,4.0,0.100314


In [12]:
sims = []
gap = 5
week_gap = int(gap/5)
training_years = 7
timeline = DateUtils.create_timeline(start,end)
sims = []
sec.connect()
strat_db.connect()
market.connect()
market.drop_table("pdr_weekly_sim")
for year in tqdm(yearly_range):
    for quarter in tqdm(quarterly_range):
        try:
            for ticker in list(accurate["ticker"][0:1]):
                try:
                    if ticker in regression.columns:
                        price = market.retrieve_price_data("{}_prices".format(dataset),ticker)
                        if dataset == "pdr":
                            price = ColumnTransformer.rename_columns(price," ")
                        else:
                            price = ColumnTransformer.rename_columns(price,"")
                        price = DateTransformer.convert_to_date(dataset,price,"date")
                        mr = ModelPreprocessor(ticker)
                        prot = ProductTransformer(ticker,start,end)
                        ticker_regression = regression
                        ## regression_model
                        first = ticker_regression[(ticker_regression["year"] == year - training_years) & (ticker_regression["quarter"] == quarter)].index.values.tolist()[0]
                        last = ticker_regression[(ticker_regression["year"] == year) & (ticker_regression["quarter"] == quarter)].index.values.tolist()[0]
                        rqpd = ticker_regression.iloc[first:last-1]
                        rqpd["y"] = rqpd[ticker]
                        rqpd["y"] = rqpd["y"].shift(-week_gap)
                        rqpd = rqpd[:-week_gap]
                        qpd = mr.day_trade_preprocess_regression(rqpd.copy(),ticker,True)
                        rpr = sp.regression(qpd,ranked=False,tf=True,deep=False)
                        ## classification_model
                        ticker_classification = classification
                        first = ticker_classification[(ticker_classification["year"] == year - training_years) & (ticker_classification["quarter"] == quarter)].index.values.tolist()[0]
                        last = ticker_classification[(ticker_classification["year"] == year) & (ticker_classification["quarter"] == quarter)].index.values.tolist()[0]
                        cqpd = ticker_classification.iloc[first:last-1]
                        cqpd["y"] = cqpd[ticker]
                        cqpd["y"] = cqpd["y"].shift(-week_gap)
                        cqpd = cqpd[:-week_gap]
                        qpd = mr.day_trade_preprocess_classify(cqpd.copy(),ticker)
                        q2c = qpd["X"].columns
                        cpr = sp.classification(qpd,tf=True,deep=False)
#                         price_results = pd.DataFrame([cpr,rpr])
#                         product_qpds = []
#                         current_sets = []
#                         for j in range(len(price_results)):
#                             price_result = price_results.iloc[j]
#                             if price_result["model_type"] == "regression":
#                                 weekly_price_data = ticker_regression
#                                 weekly_price_data = weekly_price_data[(weekly_price_data["year"] == year) & (weekly_price_data["quarter"] == quarter)]
#                                 weekly_price_data["y"] = weekly_price_data[ticker]
#                                 product_qpd = mr.day_trade_preprocess_regression(weekly_price_data.copy(),ticker,False)
#                             else:
#                                 weekly_price_data = ticker_classification
#                                 weekly_price_data = weekly_price_data[(weekly_price_data["year"] == year) & (weekly_price_data["quarter"] == quarter)]
#                                 weekly_price_data["y"] = weekly_price_data[ticker]
#                                 product_qpd = mr.day_trade_preprocess_classify(weekly_price_data.copy(),ticker)
#                             price_dict = []
#                             for week in product_qpd["X"]["week"]:
#                                 price_dict.append({"year":year,"quarter":quarter,"week":week})
#                             price = pd.DataFrame(price_dict)
#                             price_model = price_result["model"]
#                             prediction = price_model.predict(product_qpd["X"])
#                             current_set = price.copy()
#                             col_name = price_result["model_type"]
#                             current_set["weekly_{}_{}_prediction".format("price",col_name)] = prediction
#                             current_set["weekly_{}_{}_score".format("price",col_name)] = price_result["score"]
#                             current_sets.append(current_set)
#                         base = current_sets[0]
#                         for cs in current_sets[1:]:
#                             base = base.merge(cs,on=["year","quarter","week"],how="left")
#                         base["ticker"] = ticker
#                         if len(base) > 0:
#                             strat_db.store_data("{}_weekly_sim".format(dataset),base)
                except Exception as e:
                    message = {"status":"weekly modeling","ticker":ticker,"year":str(year),"quarter":str(quarter),"message":str(e)}
                    print(message)
        except Exception as e:
            print(year,week,str(e))
sec.close()
market.close()
strat_db.close()

  0%|                                                                                                                                                                                              | 0/1 [00:00<?, ?it/s]
  0%|                                                                                                                                                                                              | 0/1 [00:00<?, ?it/s][A



To change all layers to have dtype float32 by default, call `tf.keras.backend.set_floatx('float32')`. To change just this layer, pass dtype='float32' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float32 by default, call `tf.keras.backend.set_floatx('float32')`. To change just this layer, pass dtype='float32' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.22s/it][A
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00,  5.22s/it]


In [15]:
rpr.iloc[1:2]["model"]

1    Unknown label type: 'continuous'
Name: model, dtype: object

In [22]:
rqpd.describe()

Unnamed: 0,year,quarter,week,ADBE,ANSS,AZO,BIO,BKNG,BRK-B,BSX,...,LKQ,MTD,NVR,ORLY,SBAC,SNPS,TDY,TYL,WAT,y
count,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,...,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0,379.0
mean,2016.712401,2.506596,26.242744,171.08756,138.549811,763.264115,222.933393,1553.853555,167.895483,25.757646,...,30.390617,492.961773,2230.103736,272.166855,151.682939,80.139351,169.895888,180.698375,162.114244,168.140581
std,2.059724,1.120673,15.110849,109.403587,65.977,207.689944,103.935946,335.665102,33.668578,10.167831,...,4.185519,196.841132,931.217919,90.189403,62.463783,42.650808,87.807338,71.775677,43.19725,33.644687
min,2013.0,1.0,1.0,50.642,72.734,419.237988,108.064,1010.532019,109.934,11.47,...,17.926,228.052502,905.721997,123.194,77.294748,36.160001,77.272501,77.622498,95.816002,109.934
25%,2015.0,2.0,13.0,79.03675,87.173,607.869012,135.664001,1232.195007,140.134001,17.2795,...,27.152,313.467001,1363.042004,209.292001,105.159183,46.208,97.580999,124.235,123.038,140.384
50%,2017.0,3.0,26.0,120.146001,105.851999,735.027991,196.184003,1525.567535,165.657996,24.644,...,30.324,478.017502,2005.426001,262.547501,120.536488,71.17,126.295999,170.263998,157.738,165.802002
75%,2018.0,4.0,39.5,257.013,176.770999,825.3,296.534998,1861.588013,199.641,35.921,...,32.902999,645.653666,3069.257007,342.541251,169.4501,96.496,231.381999,220.51525,197.578753,199.972
max,2020.0,4.0,53.0,514.976001,337.368005,1228.647998,528.695996,2184.85,228.779999,45.553999,...,43.009999,981.762,4235.965918,472.882007,314.755377,219.953998,390.1175,373.333997,250.973999,228.779999
