In [1]:
## personal imports
from processor.processor import Processor as p
from database.market import Market
from database.sec import SEC
from database.adatabase import ADatabase
from functional.backtester import Backtester as b
from transformer.adhoc_transforms import Adhoc as adhocs
from transformer.risk_transforms import Risk as risks


## additional imports
from statistics import variance
import numpy as np
import matplotlib.pyplot as plt
import pickle
import math
from datetime import datetime, timedelta
import pytz
import pandas as pd
from tqdm import tqdm

In [2]:
start_date = datetime(2019,1,1)
end_date = datetime(2023,1,1)

In [3]:
market = Market()
sec = SEC()
fin_db = ADatabase("financial")
earnings_db = ADatabase("earnings")
dividends_db = ADatabase("dividends")
speculation_db = ADatabase("spec")
classification_db = ADatabase("spec_classification")
current_db = ADatabase("all_financial")

In [4]:
market.connect()
sp500 = market.retrieve("sp500")
market.disconnect()
sp500 = sp500.rename(columns={"Symbol":"ticker"})

In [5]:
tyields = adhocs.tyields()
bench_returns = adhocs.spy_bench()

In [9]:
new_prices = []
market.connect()
sec.connect()
for ticker in tqdm(sp500["ticker"].unique()):
    try:
        cik = int(sp500[sp500["ticker"]==ticker]["CIK"])
        financials = sec.retrieve_filing_data(cik)
        ticker_sim = market.retrieve_ticker_prices("prices",ticker)
        completed = risks.backtesting_risk_prep(ticker_sim,bench_returns,financials)
        new_prices.append(completed)
    except:
        continue
sec.disconnect()
market.disconnect()
price_returns = pd.concat(new_prices)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 490/490 [03:20<00:00,  2.44it/s]


In [10]:
speculation_db.connect()
simulation = speculation_db.retrieve("sim")
speculation_db.disconnect()
simulation["week"] = simulation["week"] + 1

In [11]:
fin_db.connect()
financial_simulation = fin_db.retrieve("sim")
fin_db.disconnect()
financial_simulation.rename(columns={"prediction":"financial_prediction"},inplace=True)
financial_simulation["year"] = financial_simulation["year"] + 1

In [12]:
earnings_db.connect()
earnings_simulation = earnings_db.retrieve("sim")
earnings_db.disconnect()
earnings_simulation.rename(columns={"prediction":"earnings_prediction"},inplace=True)
earnings_simulation["year"] = earnings_simulation["year"] + 1

In [13]:
dividends_db.connect()
dividends_simulation = dividends_db.retrieve("sim")
dividends_db.disconnect()
dividends_simulation.rename(columns={"prediction":"dividends_prediction"},inplace=True)
dividends_simulation["year"] = dividends_simulation["year"] + 1

In [14]:
classification_db.connect()
classification_simulation = classification_db.retrieve("sim")
classification_db.disconnect()
classification_simulation["week"] = classification_simulation["week"] + 1
classification_simulation.rename(columns={"prediction":"classification_prediction"},inplace=True)

In [15]:
sim = price_returns.merge(simulation.drop("adjclose",axis=1)[["year","week","ticker","prediction"]],on=["year","week","ticker"],how="left") \
                    .merge(classification_simulation.drop(["adjclose","training_years"],axis=1,errors="ignore")[["year","week","ticker","classification_prediction"]],on=["year","week","ticker"],how="left") \
                    .merge(tyields[["date","weekly_yield","quarterly_yield"]],on="date",how="left") \
                    .merge(financial_simulation[["year","quarter","ticker","financial_prediction"]],on=["year","quarter","ticker"],how="left") \
                    .merge(earnings_simulation[["year","quarter","ticker","earnings_prediction"]],on=["year","quarter","ticker"],how="left") \
                    .merge(dividends_simulation[["year","quarter","ticker","dividends_prediction"]],on=["year","quarter","ticker"],how="left")

In [16]:
ranks = sim.merge(sp500[["ticker","GICS Sector"]],how="left").groupby(["year","quarter","GICS Sector"]).mean().reset_index().sort_values(["year","quarter","return_4"],ascending=False).groupby(["year","quarter"]).first().reset_index().rename(columns={"GICS Sector":"top_sector"})[["year","quarter","top_sector"]]
ranks["year"] = ranks["year"] + 1

In [18]:
sim = risks.required_returns(sim)
sim = sim.groupby(["date","ticker","classification_prediction"]).mean().reset_index()
sim.sort_values("date",inplace=True)

In [19]:
dividend_tickers = adhocs.dividend_tickers(price_returns)

In [21]:
current_db.connect()
current_db.drop("complete")
current_db.store("complete",sim)
current_db.disconnect()

In [22]:
current_db.connect()
sim = current_db.retrieve("complete")
current_db.disconnect()

In [23]:
sim

Unnamed: 0,date,ticker,classification_prediction,close,high,low,open,volume,adjclose,adjhigh,...,quarterly_yield,financial_prediction,earnings_prediction,dividends_prediction,projected_return,delta,delta_sign,market_return,market_quarterly_return,rrr
0,2019-01-07,A,True,66.85,67.430,65.61,65.64,3235055.0,64.883650,65.446589,...,1.006511,35.188060,-3.053239,-22.830417,0.083919,0.083919,1.0,1.002691,1.035558,0.002217
1,2019-01-07,O,True,62.28,63.000,62.16,62.47,2728738.0,50.075456,50.654363,...,1.006511,,,,0.096335,0.096335,1.0,1.002691,1.035558,0.000145
2,2019-01-07,NXPI,True,76.01,77.010,74.89,75.61,3515747.0,71.489976,72.430510,...,1.006511,,,,0.231822,0.231822,1.0,1.002691,1.035558,0.003097
3,2019-01-07,NWSA,True,11.98,12.060,11.67,11.73,2191335.0,11.332641,11.408318,...,1.006511,,,,0.053538,0.053538,1.0,1.002691,1.035558,0.001182
4,2019-01-07,NWL,True,19.50,19.860,18.80,19.16,9021095.0,15.704388,15.994315,...,1.006511,53.626593,-0.235981,102.763496,-0.020051,0.020051,-1.0,1.002691,1.035558,0.000056
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
443249,2022-12-23,EOG,True,130.61,130.710,127.48,127.48,1678396.0,127.648272,127.746004,...,1.011331,,,,-0.010806,0.010806,-1.0,1.002691,1.035558,0.002059
443250,2022-12-23,ENPH,True,293.95,303.000,288.27,302.57,2338842.0,293.950000,303.000000,...,1.011331,,,,0.094967,0.094967,1.0,1.002691,1.035558,-0.002397
443251,2022-12-23,EMR,True,95.54,96.040,94.59,94.75,2977772.0,94.963790,95.460775,...,1.011331,98.725644,1.709533,60.647191,0.006407,0.006407,1.0,1.002691,1.035558,0.000092
443252,2022-12-23,EL,True,241.02,242.840,238.75,242.10,551007.0,240.364839,242.179891,...,1.011331,,,,0.012229,0.012229,1.0,1.002691,1.035558,-0.001161


In [24]:
sim = sim.groupby(["date","ticker"]).mean().reset_index()

In [25]:
trades = []
reqs = [0.05]
signals = [0.05]
values = [True]
classifications = [True,False]
ceilings = [True,False]
floors = [True,False]
hedges = [False]
parameters = []
positions = 10
training_year = 4
strats = ["earnings","dividends","fin","sector"]
for strat in strats:
    for value in values:
        for classification in classifications:
            for ceiling in ceilings:
                for floor in floors:
                    for hedge in hedges:
                        for signal in signals:
                            for req in reqs:
                                parameter = {
                                            "value":value
                                             ,"classification":classification
                                             ,"ceiling":ceiling
                                             ,"floor":floor
                                             ,"hedge":hedge
                                             ,"signal":signal
                                             ,"req":req
                                             ,"training_years":training_year
                                            ,"strat":strat
                                            }
                                parameters.append(parameter)

In [26]:
len(parameters)

32

In [27]:
parameter = parameters[0]

In [28]:
current_db.connect()
current_db.drop("trades")
for parameter in tqdm(parameters):
    simulation = sim.copy()
    strat = parameter["strat"]
    simulation = risks.strat_specific(strat,simulation,dividend_tickers,sp500,ranks)
    b.experimental_backtest(simulation.copy(),parameter,start_date,end_date,current_db)
current_db.disconnect()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [04:00<00:00,  7.51s/it]
