In [17]:
import pandas as pd
from database.market import Market
from database.adatabase import ADatabase
from processor.processor import Processor as p
from datetime import datetime
from tqdm import tqdm
import numpy as np
from modeler_strats.universal_modeler import UniversalModeler
import pickle

In [2]:
market = Market()
spec = ADatabase("spec_classification")
umod = UniversalModeler()

In [3]:
current_year = 2023

In [4]:
market.connect()
sp500 = market.retrieve("sp500")
market.disconnect()
sp500 = sp500.rename(columns={"Symbol":"ticker"})

In [5]:
factors = []
factors.append("d1")
factors.append("d2")
factors.append("d3")
factors.append("rolling14")

In [6]:
included_columns=[
    "year",
    "week",
    "ticker",
    "adjclose",
    "y"
]

In [7]:
included_columns.extend(factors)

In [8]:
training_sets = []

In [9]:
for ticker in sp500["ticker"].unique():
    try:
        prices = market.retrieve_ticker_prices("prices",ticker)
        prices = p.column_date_processing(prices)
        prices["year"] = [x.year for x in prices["date"]]
        prices["quarter"] = [x.quarter for x in prices["date"]]
        ticker_data = prices[prices["ticker"]==ticker]
        ticker_data.sort_values("date",ascending=True,inplace=True)
        ticker_data["adjclose"] = [float(x) for x in ticker_data["adjclose"]]
        ticker_data = ticker_data.groupby(["year","week"]).mean().reset_index()
        ticker_data["d1"] = ticker_data["adjclose"].pct_change(periods=1)
        ticker_data["d2"] = ticker_data["d1"].pct_change(periods=1)
        ticker_data["d3"] = ticker_data["d2"].pct_change(periods=1)
        ticker_data["rolling14"] = ticker_data["adjclose"].rolling(window=14).mean()
        ticker_data.dropna(inplace=True)
        ticker_data["ticker"] = ticker
        ticker_data["future"] = ticker_data["adjclose"].shift(-1)
        ticker_data["delta"] = (ticker_data["future"] - ticker_data["adjclose"]) / ticker_data["adjclose"]
        ticker_data["y"] = [x > 0 for x in ticker_data["delta"]]
        ticker_data = ticker_data.replace([np.inf, -np.inf], np.nan).dropna()
        ticker_data.dropna(inplace=True)
        ticker_data = ticker_data[included_columns]
        training_sets.append(ticker_data)
    except:
        continue

In [10]:
data = pd.concat(training_sets)

In [11]:
data = data.dropna()

In [12]:
data

Unnamed: 0,year,week,ticker,adjclose,y,d1,d2,d3,rolling14
13,2000,14,MMM,25.077294,True,0.041247,12.190637,-13.811506,24.512802
14,2000,15,MMM,25.480959,False,0.016097,-0.609744,-1.050017,24.464388
15,2000,16,MMM,24.647652,True,-0.032703,-3.031650,3.972003,24.279800
16,2000,17,MMM,24.709535,False,0.002511,-1.076772,-0.644823,24.181488
17,2000,18,MMM,23.492011,False,-0.049273,-20.625452,18.154890,24.048622
...,...,...,...,...,...,...,...,...,...
524,2023,6,ZTS,162.342000,True,-0.029983,-5.767340,6.172354,152.808105
525,2023,7,ZTS,171.204000,False,0.054588,-2.820631,-0.510930,155.082868
526,2023,8,ZTS,168.025000,False,-0.018568,-1.340154,-0.524874,156.626157
527,2023,9,ZTS,167.836000,False,-0.001125,-0.939423,-0.299019,157.986730


In [15]:
spec.connect()
years = 4
for modeler in [umod]:
    for year in tqdm(range(current_year,current_year+1)):
        training_slice = data[(data["year"]<year) & (data["year"] >= year - years)].reset_index(drop=True)
        prediction_set = data[data["year"]==year].reset_index(drop=True)
        set_name = "sim"
        stuff = modeler.recommend_classification_model(training_slice,factors,multioutput=False)
        stuff["training_years"] = years
spec.disconnect()

  0%|                                                                                                                                                              | 0/1 [00:00<?, ?it/s]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [04:46<00:00, 286.12s/it]


In [18]:
stuff["model"] = [pickle.dumps(x) for x in stuff["model"]]

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers\dense
......vars
.........0
.........1
...layers\dense_1
......vars
.........0
.........1
...layers\dropout
......vars
...metrics\mean
......vars
.........0
.........1
...metrics\mean_metric_wrapper
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........2
.........3
.........4
.........5
.........6
.........7
.........8
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2023-03-15 09:46:40         2141
metadata.json                                  2023-03-15 09:46:40           64
variables.h5                                   2023-03-15 09:46:40        23208


In [19]:
spec.connect()
models = spec.store("models",stuff)
spec.disconnect()