In [1]:
import pandas as pd
from database.market import Market
from database.sec import SEC
from database.adatabase import ADatabase
from processor.processor import Processor as p
from datetime import datetime
from tqdm import tqdm
import numpy as np
from modeler_strats.universal_modeler import UniversalModeler
import pickle

In [2]:
current_year = 2023

In [3]:
market = Market()
spec = ADatabase("dividends")
umod = UniversalModeler()
sec = SEC()

In [4]:
market.connect()
sp500 = market.retrieve("sp500")
market.disconnect()
sp500 = sp500.rename(columns={"Symbol":"ticker"})

In [5]:
factors = ['assets',
 'liabilitiesandstockholdersequity',
 'incometaxexpensebenefit',
 'retainedearningsaccumulateddeficit',
 'accumulatedothercomprehensiveincomelossnetoftax',
 'earningspersharebasic',
 'earningspersharediluted',
 'propertyplantandequipmentnet',
 'cashandcashequivalentsatcarryingvalue',
 'entitycommonstocksharesoutstanding',
 'weightedaveragenumberofdilutedsharesoutstanding',
 'weightedaveragenumberofsharesoutstandingbasic',
 'stockholdersequity',
"commonstockdividendspersharecashpaid"]

In [6]:
included_columns=[
    "year",
    "week",
    "ticker",
    "adjclose",
    "y"
]

In [7]:
included_columns.extend(factors)

In [8]:
training_sets = []

In [9]:
sec.connect()
market.connect()
for ticker in sp500["ticker"].unique():
    try:
        cik = int(sp500[sp500["ticker"]==ticker]["CIK"])
        prices = market.retrieve_ticker_prices("prices",ticker)
        prices = p.column_date_processing(prices)
        prices["year"] = [x.year for x in prices["date"]]
        prices["quarter"] = [x.quarter for x in prices["date"]]
        filing = sec.retrieve_filing_data(cik)
        filing = p.column_date_processing(filing)
        filing = filing.groupby(["year","quarter"]).mean().reset_index()
        ticker_data = prices.copy()
        ticker_data.sort_values("date",ascending=True,inplace=True)
        ticker_data["adjclose"] = [float(x) for x in ticker_data["adjclose"]]
        ticker_data = ticker_data.groupby(["year","quarter"]).mean().reset_index()
        ticker_data.dropna(inplace=True)
        ticker_data["ticker"] = ticker
        ticker_data = ticker_data.merge(filing,on=["year","quarter"],how="left").reset_index()
        ticker_data["y"] = ticker_data["adjclose"].shift(-4) + ticker_data["commonstockdividendspersharecashpaid"]
        ticker_data = ticker_data[included_columns]
        training_sets.append(ticker_data)
    except Exception as e:
        print(ticker,str(e))
sec.disconnect()
market.disconnect()

ABT 'commonstockdividendspersharecashpaid'
ABMD 'commonstockdividendspersharecashpaid'
ACN "['entitycommonstocksharesoutstanding'] not in index"
ATVI 'commonstockdividendspersharecashpaid'
ADM "['stockholdersequity'] not in index"
ADBE 'commonstockdividendspersharecashpaid'
ADP 'commonstockdividendspersharecashpaid'
AAP 'commonstockdividendspersharecashpaid'
APD 'commonstockdividendspersharecashpaid'
AKAM 'commonstockdividendspersharecashpaid'
ALB 'commonstockdividendspersharecashpaid'
ARE 'commonstockdividendspersharecashpaid'
ALGN 'commonstockdividendspersharecashpaid'
ALL "['cashandcashequivalentsatcarryingvalue'] not in index"
GOOGL 'commonstockdividendspersharecashpaid'
AMZN 'commonstockdividendspersharecashpaid'
AMCR 'commonstockdividendspersharecashpaid'
AMD 'commonstockdividendspersharecashpaid'
AEP 'commonstockdividendspersharecashpaid'
AXP 'commonstockdividendspersharecashpaid'
AIG "['cashandcashequivalentsatcarryingvalue'] not in index"
AMP 'commonstockdividendspersharecashp

NTAP 'commonstockdividendspersharecashpaid'
NFLX 'commonstockdividendspersharecashpaid'
NWSA 'commonstockdividendspersharecashpaid'
NDSN 'commonstockdividendspersharecashpaid'
NTRS 'commonstockdividendspersharecashpaid'
NOC 'commonstockdividendspersharecashpaid'
NCLH 'commonstockdividendspersharecashpaid'
NRG "['stockholdersequity'] not in index"
NUE 'commonstockdividendspersharecashpaid'
NVR 'commonstockdividendspersharecashpaid'
ORLY 'commonstockdividendspersharecashpaid'
OXY 'commonstockdividendspersharecashpaid'
ODFL 'commonstockdividendspersharecashpaid'
OMC 'commonstockdividendspersharecashpaid'
PCAR 'commonstockdividendspersharecashpaid'
PARA "['entitycommonstocksharesoutstanding'] not in index"
PAYC 'commonstockdividendspersharecashpaid'
PYPL 'commonstockdividendspersharecashpaid'
PENN 'commonstockdividendspersharecashpaid'
PEP 'commonstockdividendspersharecashpaid'
PM 'commonstockdividendspersharecashpaid'
PNW 'commonstockdividendspersharecashpaid'
PXD 'commonstockdividendsper

In [10]:
data = pd.concat(training_sets)

In [11]:
data = data.dropna()

In [13]:
spec.connect()
for modeler in [umod]:
    for year in tqdm(range(current_year,current_year+1)):
        training_slice = data[(data["year"]<year) & (data["year"] >= year - 4)].reset_index(drop=True)
        set_name = "models"
        stuff = modeler.recommend_model(training_slice,factors,tf=False)
spec.disconnect()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00,  8.58s/it]


In [14]:
stuff["model"] = [pickle.dumps(x) for x in stuff["model"]]

In [15]:
spec.connect()
models = spec.store("models",stuff)
spec.disconnect()