In [1]:
from datetime import datetime, timedelta
import pytz
import pandas as pd
from tqdm import tqdm
from modeler.modeler import Modeler as m
from processor.processor import Processor as p
from database.market import Market
from database.adatabase import ADatabase
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [2]:
market = Market()
speculation_db = ADatabase("spec")

In [3]:
market.connect()
prices = market.retrieve("prices")
sp5 = market.retrieve("sp500")
market.disconnect()

In [4]:
prices = p.column_date_processing(prices)

In [5]:
prices["year"] = [x.year for x in prices["date"]]
prices["week"] = [x.week for x in prices["date"]]
prices["quarter"] = [x.quarter for x in prices["date"]]

In [6]:
data = []
for ticker in tqdm(sp5["Symbol"]):
    ticker_data = prices[prices["ticker"]==ticker]
    ticker_data.sort_values("date",ascending=True)
    ticker_data = ticker_data.groupby(["year","week"]).mean().reset_index()
    for i in range(14):
        ticker_data[str(i)] = ticker_data["adjclose"].shift(i)
    ticker_data["d1"] = ticker_data["adjclose"].pct_change(periods=7)
    ticker_data["d2"] = ticker_data["d1"].pct_change(periods=1)
    ticker_data["d3"] = ticker_data["d2"].pct_change(periods=1)
    ticker_data.dropna(inplace=True)
    ticker_data["ticker"] = ticker
    data.append(ticker_data)
    

100%|████████████████████████████████████████████████████████████████████████| 497/497 [00:53<00:00,  9.28it/s]


In [7]:
final_data = pd.concat(data)

In [8]:
final_data.head()

Unnamed: 0,year,week,close,high,low,open,volume,adjclose,adjhigh,adjlow,...,8,9,10,11,12,13,d1,d2,d3,ticker
13,2012,14,88.2825,88.6,87.595,88.345,1490621.5,65.410132,65.645373,64.90075,...,64.552681,64.255366,63.740216,62.691886,61.791478,63.318207,0.009971,-0.520761,1.488143,MMM
14,2012,15,85.762,86.476,85.2227,85.998,2639420.0,63.542647,64.071662,63.143069,...,64.764344,64.552681,64.255366,63.740216,62.691886,61.791478,-0.023934,-3.400322,5.529528,MMM
15,2012,16,87.052,87.516,86.462,86.764,2587640.0,64.498431,64.842218,64.06129,...,65.100798,64.764344,64.552681,64.255366,63.740216,62.691886,-0.007276,-0.696018,-0.795308,MMM
16,2012,17,88.62,89.07,88.032,88.568,3360580.0,65.660192,65.993605,65.224531,...,64.971137,65.100798,64.764344,64.552681,64.255366,63.740216,0.028098,-4.861947,5.985371,MMM
17,2012,18,89.308,89.618,88.752,89.242,2326940.0,66.169943,66.399628,65.757993,...,63.865687,64.971137,65.100798,64.764344,64.552681,64.255366,0.004092,-0.854351,-0.824278,MMM


In [None]:
start_year = 2023
end_year = 2024
training_years = 4
speculation_db.connect()
for year in range(start_year,end_year):
    for ticker in tqdm(sp5["Symbol"]):
        try:
            ticker_data = final_data[final_data["ticker"]==ticker]
            factors = [str(i) for i in range(14)]
            training_data = ticker_data[(ticker_data["year"]<year) & (ticker_data["year"]>=year-training_years)].reset_index(drop=True)
            prediction_data = ticker_data[ticker_data["year"]==year]
            if training_data.index.size > 100:
                refined_data = {"X":training_data[factors],"y":training_data[["adjclose"]].rename(columns={"adjclose":"y"})}
                models = m.regression(refined_data)
                models["model"] = [pickle.dumps(x) for x in models["model"]]
                models["ticker"] = ticker
                models["year"] = year
                models["training_years"] = training_years
                speculation_db.store("models",models)
            else:
                continue
        except Exception as e:
            print(year,ticker,str(e))
speculation_db.disconnect()

 56%|████████████████████████████████████████▏                               | 277/497 [26:09<23:09,  6.32s/it]

In [10]:
speculation_db.connect()
models = speculation_db.retrieve("models")
models["model"] = [pickle.loads(x) for x in models["model"]]
for year in range(start_year,end_year):
    for ticker in tqdm(sp5["Symbol"]):
        try:
            ticker_data = final_data[final_data["ticker"]==ticker]
            factors = [str(i) for i in range(14)]
            prediction_data = ticker_data[ticker_data["year"]==year]
            ticker_model_data = models[(models["ticker"]==ticker) & (models["year"]==year) & (models["training_years"]==training_years)]
            for row in ticker_model_data.iterrows():
                model = row[1]["model"]
                api = row[1]["api"]
                score = row[1]["score"]
                prediction_data[f"{api}_prediction"] = model.predict(prediction_data[factors])
                prediction_data[f"{api}_score"] = score
            included_columns = ["year","week","ticker"]
            included_columns.extend([x for x in prediction_data.columns if "score" in x or "prediction" in x])
            speculation_db.store("predictions",prediction_data[included_columns].tail(1))
        except Exception as e:
            print(year,ticker,str(e))
speculation_db.disconnect()

 13%|██████████████████████▊                                                                                                                                                  | 68/505 [00:03<00:24, 17.69it/s]

strategy_speculation predictions documents must be a non-empty list


 17%|████████████████████████████▍                                                                                                                                            | 85/505 [00:04<00:23, 18.00it/s]

strategy_speculation predictions documents must be a non-empty list


 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 498/505 [00:29<00:00, 16.97it/s]

2022 WLTW Found array with 0 sample(s) (shape=(0, 14)) while a minimum of 1 is required.


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 505/505 [00:29<00:00, 17.01it/s]

strategy_speculation predictions documents must be a non-empty list
strategy_speculation predictions documents must be a non-empty list
strategy_speculation predictions documents must be a non-empty list
strategy_speculation predictions documents must be a non-empty list





In [11]:
speculation_db.connect()
predictions = speculation_db.retrieve("predictions")
speculation_db.disconnect()

In [None]:
snapshot = prices.merge(predictions,on=["year","week","ticker"])
todays = snapshot[snapshot["date"]==snapshot["date"].max()]
todays["prediction"] = (todays["skl_prediction"] + todays["xgb_prediction"] + todays["cat_prediction"]) / 3
todays["delta"] = (todays["prediction"] - todays["adjclose"]) / todays["adjclose"]

In [None]:
todays[["date","delta","adjclose","ticker"]].sort_values("delta",ascending=False).head(20).dropna()