In [11]:
import pandas as pd
from database.market import Market
from database.sec import SEC
from database.adatabase import ADatabase
from processor.processor import Processor as p
from datetime import datetime
from tqdm import tqdm
import numpy as np
from modeler_strats.universal_modeler import UniversalModeler
import pickle

In [12]:
current_year = 2023

In [13]:
market = Market()
sec = SEC()
spec = ADatabase("earnings")
umod = UniversalModeler()

In [14]:
market.connect()
sp500 = market.retrieve("sp500")
market.disconnect()
sp500 = sp500.rename(columns={"Symbol":"ticker"})

In [15]:
factors = ['assets',
 'liabilitiesandstockholdersequity',
 'incometaxexpensebenefit',
 'retainedearningsaccumulateddeficit',
 'accumulatedothercomprehensiveincomelossnetoftax',
 'earningspersharebasic',
 'earningspersharediluted',
 'propertyplantandequipmentnet',
 'cashandcashequivalentsatcarryingvalue',
 'entitycommonstocksharesoutstanding',
 'weightedaveragenumberofdilutedsharesoutstanding',
 'weightedaveragenumberofsharesoutstandingbasic',
 'stockholdersequity']

In [16]:
included_columns=[
    "year",
    "week",
    "ticker",
    "adjclose",
    "y"
]

In [17]:
included_columns.extend(factors)

In [18]:
training_sets = []

In [20]:
sec.connect()
market.connect()
for ticker in sp500["ticker"].unique():
    try:
        cik = int(sp500[sp500["ticker"]==ticker]["CIK"])
        prices = market.retrieve_ticker_prices("prices",ticker)
        prices = p.column_date_processing(prices)
        prices["year"] = [x.year for x in prices["date"]]
        prices["quarter"] = [x.quarter for x in prices["date"]]
        filing = sec.retrieve_filing_data(cik)
        filing = p.column_date_processing(filing)
        filing = filing.groupby(["year","quarter"]).mean().reset_index()
        ticker_data = prices.copy()
        ticker_data.sort_values("date",ascending=True,inplace=True)
        ticker_data["adjclose"] = [float(x) for x in ticker_data["adjclose"]]
        ticker_data = ticker_data.groupby(["year","quarter"]).mean().reset_index()
        ticker_data.dropna(inplace=True)
        ticker_data["ticker"] = ticker
        ticker_data = ticker_data.merge(filing,on=["year","quarter"],how="left").reset_index()
        ticker_data["y"] = ticker_data["earningspersharebasic"].shift(-4)
        ticker_data = ticker_data[included_columns]
        training_sets.append(ticker_data)
    except Exception as e:
        print(ticker,str(e))
sec.disconnect()
market.disconnect()

ACN "['entitycommonstocksharesoutstanding'] not in index"
ADM "['stockholdersequity'] not in index"
ARE "['incometaxexpensebenefit'] not in index"
ALL "['cashandcashequivalentsatcarryingvalue'] not in index"
GOOGL "['weightedaveragenumberofdilutedsharesoutstanding', 'weightedaveragenumberofsharesoutstandingbasic'] not in index"
AXP "['cashandcashequivalentsatcarryingvalue'] not in index"
AIG "['cashandcashequivalentsatcarryingvalue'] not in index"
AJG "['weightedaveragenumberofdilutedsharesoutstanding'] not in index"
T "['stockholdersequity'] not in index"
AVB "['propertyplantandequipmentnet'] not in index"
BKR "['weightedaveragenumberofsharesoutstandingbasic'] not in index"
BIO "['entitycommonstocksharesoutstanding'] not in index"
BXP "['incometaxexpensebenefit', 'propertyplantandequipmentnet'] not in index"
BSX "['stockholdersequity'] not in index"
CPT "['incometaxexpensebenefit', 'retainedearningsaccumulateddeficit', 'propertyplantandequipmentnet'] not in index"
COF "['cashandcasheq

In [21]:
data = pd.concat(training_sets)

In [22]:
data = data.dropna()

In [24]:
spec.connect()
for modeler in [umod]:
    for year in tqdm(range(current_year,current_year+1)):
        training_slice = data[(data["year"]<year) & (data["year"] >= year - 4)].reset_index(drop=True)
        set_name = "models"
        stuff = modeler.recommend_model(training_slice,factors,tf=False)
spec.disconnect()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:10<00:00, 10.51s/it]


In [25]:
stuff["model"] = [pickle.dumps(x) for x in stuff["model"]]

In [26]:
spec.connect()
models = spec.store("models",stuff)
spec.disconnect()