In [1]:
from datetime import datetime, timedelta
import pandas as pd
from tqdm import tqdm
from modeler.modeler import Modeler as m
from processor.processor import Processor as p
from database.market import Market
from database.adatabase import ADatabase
import numpy as np
import pickle

In [2]:
start_year = datetime.now().year

In [3]:
market = Market()
speculation_db = ADatabase("speculation")

In [4]:
market.connect()
sp5 = market.retrieve("sp500")
market.disconnect()

In [5]:
market.connect()
data = []
for ticker in tqdm(sp5["Symbol"]):
    try:
        ticker_data = market.retrieve_ticker_prices("prices",ticker)
        ticker_data = p.column_date_processing(ticker_data)
        ticker_data["year"] = [x.year for x in ticker_data["date"]]
        ticker_data["week"] = [x.week for x in ticker_data["date"]]
        ticker_data.sort_values("date",ascending=True)
        ticker_data["adjclose"] = [float(x) for x in ticker_data["adjclose"]]
        ticker_data = ticker_data.groupby(["year","week"]).mean().reset_index()
        for i in range(14):
            ticker_data[str(i)] = ticker_data["adjclose"].shift(i)
        ticker_data["d1"] = ticker_data["adjclose"].pct_change(periods=7)
        ticker_data["d2"] = ticker_data["d1"].pct_change(periods=1)
        ticker_data["d3"] = ticker_data["d2"].pct_change(periods=1)
        ticker_data["y"] = ticker_data["adjclose"].shift(-1)
        ticker_data.dropna(inplace=True)
        ticker_data["ticker"] = ticker
        data.append(ticker_data)
    except Exception as e:
        print(str(e))
        continue
final_data = pd.concat(data)
market.disconnect()

  9%|████▊                                                | 46/504 [00:03<00:35, 13.04it/s]

'date'


 13%|███████▏                                             | 68/504 [00:05<00:32, 13.38it/s]

'date'


 16%|████████▋                                            | 83/504 [00:06<00:28, 14.95it/s]

'date'


100%|████████████████████████████████████████████████████| 504/504 [00:38<00:00, 13.02it/s]


'date'
'date'
'date'


In [6]:
factors = [str(i) for i in range(14)]
factors.append("d1")
factors.append("d2")
factors.append("d3")
factors.append("adjclose")

In [7]:
final_data = final_data.replace([np.inf, -np.inf], np.nan).dropna()

In [8]:
sim = []
speculation_db.connect()
errors = []
training_year = 4
for year in tqdm(range(start_year,start_year+1)):
        try:
            training_data = final_data[(final_data["year"]<year) & (final_data["year"]>=year-training_year)]
            training_data.dropna(inplace=True)
            training_data.reset_index(drop=True,inplace=True)
            if training_data.index.size > 100:
                refined_data = {"X":training_data[factors],"y":training_data[["y"]]}
                models = m.regression(refined_data)
                models["year"] = year
                models["training_year"] = training_year
                models["model"] = [pickle.dumps(x) for x in models["model"]]
                speculation_db.store("models",models)
            else:
                errors.append([year,training_data.index.size,prediction_data.index.size,"not_enough_stuff"])
        except Exception as e:
            errors.append([year,training_data.index.size,prediction_data.index.size,str(e)])
speculation_db.disconnect()

100%|████████████████████████████████████████████████████████| 1/1 [00:22<00:00, 22.62s/it]
