In [1]:
from database.market import Market
from database.sec import SEC
import pandas as pd
from tqdm import tqdm
from modeler_strats.universal_modeler import UniversalModeler
from processor.processor import Processor as p
from datetime import datetime
from functional.transformer import Transformer as tf
from database.adatabase import ADatabase

In [2]:
market = Market()
sec = SEC()
market.connect()
sp500 = market.retrieve("sp500")
market.disconnect()
modeler = UniversalModeler()
fs = ADatabase("FS")

In [3]:
included_columns=[
    "year",
    "quarter",
    "date",
    "filed",
    "ticker",
    'GICS Sector',
    'GICS Sub-Industry',
    "adjclose",
 'assets',
 'liabilitiesandstockholdersequity',
 'accumulatedothercomprehensiveincomelossnetoftax',
 'earningspersharediluted',
 'earningspersharebasic',
    "y"
]

In [4]:
factors =[
    "adjclose",
 'assets',
 'liabilitiesandstockholdersequity',
 'accumulatedothercomprehensiveincomelossnetoftax',
 'earningspersharediluted',
 'earningspersharebasic',
]

In [5]:
analysis = []
sec.connect()
market.connect()
all_filings = []
for ticker in tqdm(sp500["Symbol"]):
    try:
        cik = sp500[sp500["Symbol"]==ticker]["CIK"].item()
        filing = sec.retrieve_filing_data(cik)
        prices = market.retrieve_ticker_prices("prices",ticker)
        filing["ticker"] = ticker
        filing["date"] = [datetime.strptime(str(x),"%Y%m%d") for x in filing["filed"]]
        filing = p.column_date_processing(filing)
        prices = p.column_date_processing(prices)
        prices = prices.groupby(["year","quarter","ticker"]).mean()
        prices["y"] = prices["adjclose"].shift(-4)
        data = filing.merge(prices,on=["year","quarter","ticker"],how="left")
        data = data.merge(sp500.rename(columns={"Symbol":"ticker"}),on="ticker",how="left")[included_columns]
        all_filings.append(data)
    except Exception as e:
        print(ticker,str(e))
sec.disconnect()
market.disconnect()

 25%|████████████████████████████████████▋                                                                                                             | 125/497 [00:40<01:50,  3.36it/s]

STZ "['earningspersharediluted', 'earningspersharebasic'] not in index"


 32%|██████████████████████████████████████████████▍                                                                                                   | 158/497 [00:50<02:12,  2.56it/s]

DRE 'year'


 39%|████████████████████████████████████████████████████████▋                                                                                         | 193/497 [01:02<01:59,  2.55it/s]

FRC 'filed'


 41%|███████████████████████████████████████████████████████████▉                                                                                      | 204/497 [01:05<00:57,  5.06it/s]

FBHS 'year'


 45%|██████████████████████████████████████████████████████████████████                                                                                | 225/497 [01:13<01:38,  2.76it/s]

HSY "['earningspersharediluted', 'earningspersharebasic'] not in index"


 52%|███████████████████████████████████████████████████████████████████████████▊                                                                      | 258/497 [01:24<00:54,  4.42it/s]

JBHT "['accumulatedothercomprehensiveincomelossnetoftax'] not in index"
JKHY "['accumulatedothercomprehensiveincomelossnetoftax'] not in index"


 64%|█████████████████████████████████████████████████████████████████████████████████████████████▍                                                    | 318/497 [01:44<00:45,  3.91it/s]

MNST "['earningspersharediluted', 'earningspersharebasic'] not in index"


 69%|████████████████████████████████████████████████████████████████████████████████████████████████████▊                                             | 343/497 [01:52<00:45,  3.38it/s]

NVR "['accumulatedothercomprehensiveincomelossnetoftax'] not in index"


 70%|██████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 348/497 [01:53<00:33,  4.46it/s]

ODFL "['accumulatedothercomprehensiveincomelossnetoftax'] not in index"


 74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████                                      | 368/497 [01:59<00:41,  3.13it/s]

PXD "['accumulatedothercomprehensiveincomelossnetoftax'] not in index"


 82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                         | 410/497 [02:14<00:37,  2.33it/s]

SBNY 'filed'


 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                     | 425/497 [02:19<00:20,  3.43it/s]

SYF "['accumulatedothercomprehensiveincomelossnetoftax'] not in index"


 91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏             | 450/497 [02:25<00:11,  4.07it/s]

TSN "['earningspersharebasic'] not in index"


 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 470/497 [02:32<00:07,  3.44it/s]

V "['earningspersharediluted', 'earningspersharebasic'] not in index"


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 497/497 [02:41<00:00,  3.07it/s]


In [6]:
training_data = pd.concat(all_filings)
training_data

Unnamed: 0,year,quarter,date,filed,ticker,GICS Sector,GICS Sub-Industry,adjclose,assets,liabilitiesandstockholdersequity,accumulatedothercomprehensiveincomelossnetoftax,earningspersharediluted,earningspersharebasic,y
0,2012,1,2012-02-16,20120216,MMM,Industrials,Industrial Conglomerates,63.545741,2.967400e+10,3.088600e+10,-4.284000e+09,2.519091,2.555455,76.368726
1,2012,2,2012-05-03,20120503,MMM,Industrials,Industrial Conglomerates,63.627935,3.181550e+10,3.181550e+10,-4.905000e+09,1.540000,1.565000,81.819282
2,2012,3,2012-08-02,20120802,MMM,Industrials,Industrial Conglomerates,67.603937,3.247750e+10,3.247750e+10,-5.018500e+09,2.400000,2.440000,88.124361
3,2012,4,2012-11-01,20121101,MMM,Industrials,Industrial Conglomerates,68.030758,3.304750e+10,3.304750e+10,-4.786500e+09,3.172500,3.217500,97.059920
4,2013,1,2013-02-14,20130214,MMM,Industrials,Industrial Conglomerates,76.368726,3.188267e+10,3.274600e+10,-4.887500e+09,2.743636,2.783636,101.847559
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37,2021,4,2021-11-04,20211104,ZTS,Health Care,Pharmaceuticals,216.862532,1.365700e+10,1.365700e+10,-6.985000e+08,2.057500,2.070000,147.533594
38,2022,1,2022-02-15,20220215,ZTS,Health Care,Pharmaceuticals,196.040632,1.375450e+10,1.375450e+10,-7.470000e+08,3.600000,3.623333,156.787441
39,2022,2,2022-05-05,20220505,ZTS,Health Care,Pharmaceuticals,171.798380,1.388000e+10,1.388000e+10,-7.340000e+08,1.215000,1.220000,
40,2022,3,2022-08-04,20220804,ZTS,Health Care,Pharmaceuticals,166.327240,1.383500e+10,1.383500e+10,-7.430000e+08,1.702500,1.712500,


In [10]:
fs.connect()
for year in range(2019,2020):
    training_set = training_data[(training_data["year"]<year) & (training_data["year"]>=year-7)].reset_index().copy()
    training_set.dropna(inplace=True)
    training_set = training_set.reset_index()
    prediction_set = training_data[training_data["year"]==year].reset_index()
    stuff = modeler.model(training_set,prediction_set,factors)
    stuff["year"] = [row[1]["year"] + 1 for row in stuff.iterrows()]
    fs.store("sim",stuff)
fs.disconnect()

NameError: name 'speculation_db' is not defined