In [1]:
from datetime import datetime, timedelta
import pytz
import pandas as pd
from tqdm import tqdm
from modeler.modeler import Modeler as m
from processor.processor import Processor as p
from database.market import Market
from database.adatabase import ADatabase
import numpy as np
import matplotlib.pyplot as plt
import pickle
from functional.backtester import Backtester as b
from statistics import variance
import math

In [2]:
start_date = datetime(2020,1,1)
end_date = datetime(2023,1,1)

In [3]:
market = Market()
speculation_db = ADatabase("spec")
classification_db = ADatabase("spec_classification")
class_spec = ADatabase("class_spec")
experimental = ADatabase("short_included")

In [4]:
market.connect()
sp500 = market.retrieve("sp500")
market.disconnect()
sp500 = sp500.rename(columns={"Symbol":"ticker"})

In [5]:
bench = pd.read_csv("./csv_files/FED/SPY.csv")
bench = p.column_date_processing(bench)
bench["day"] = [x.weekday() for x in bench["date"]]
bench_returns = bench.copy()
bench_returns[f"bench_return"] = (bench_returns["adjclose"].shift(-4) - bench_returns["adjclose"].shift(-1)) / bench_returns["adjclose"].shift(-1)
bench_returns["variance"] = bench_returns["bench_return"].rolling(window=100).var()
bench_returns.dropna()

Unnamed: 0,date,open,high,low,close,adjclose,volume,year,quarter,week,day,bench_return,variance
99,2018-09-20,292.640015,293.940002,291.239990,293.579987,270.806702,100360600,2018,3,38,3,-0.007226,0.000078
100,2018-09-21,293.089996,293.220001,291.809998,291.989990,270.559357,105479700,2018,3,38,4,-0.001134,0.000077
101,2018-09-24,291.339996,291.500000,290.369995,291.019989,269.660553,53409600,2018,3,39,0,-0.000103,0.000075
102,2018-09-25,291.529999,291.649994,290.480011,290.750000,269.410339,44370000,2018,3,39,1,0.006382,0.000074
103,2018-09-26,290.910004,292.239990,289.410004,289.880005,268.604248,79739700,2018,3,39,2,0.002993,0.000071
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1249,2023-04-18,415.579987,415.720001,412.779999,414.209991,414.209991,63560000,2023,2,16,1,-0.003646,0.000305
1250,2023-04-19,412.220001,415.079987,412.160004,414.140015,414.140015,55227300,2023,2,16,2,-0.014082,0.000304
1251,2023-04-20,411.209991,413.700012,410.269989,411.880005,411.880005,75840400,2023,2,16,3,-0.019020,0.000306
1252,2023-04-21,412.190002,412.679993,410.170013,412.200012,412.200012,73436100,2023,2,16,4,-0.000533,0.000298


In [6]:
tyields = pd.read_csv("./csv_files/FED/DGS1.csv")
tyields = p.column_date_processing(tyields)
tyields["dgs1"] = tyields["dgs1"].replace(".",0)
tyields["dgs1"] = tyields["dgs1"].astype("float")
tyields["yield"] = [1+(x/100) for x in tyields["dgs1"]]
tyields["weekly_yield"] = [math.exp(math.log(x)/52) for x in tyields["yield"]]
tyields

Unnamed: 0,date,dgs1,year,quarter,week,yield,weekly_yield
0,2018-04-27,2.24,2018,2,17,1.0224,1.000426
1,2018-04-30,2.24,2018,2,18,1.0224,1.000426
2,2018-05-01,2.26,2018,2,18,1.0226,1.000430
3,2018-05-02,2.24,2018,2,18,1.0224,1.000426
4,2018-05-03,2.24,2018,2,18,1.0224,1.000426
...,...,...,...,...,...,...,...
1300,2023-04-21,4.78,2023,2,16,1.0478,1.000898
1301,2023-04-24,4.76,2023,2,17,1.0476,1.000895
1302,2023-04-25,4.60,2023,2,17,1.0460,1.000865
1303,2023-04-26,4.64,2023,2,17,1.0464,1.000873


In [9]:
new_prices = []
market.connect()
for ticker in tqdm(sp500["ticker"].unique()):
    try:
        ticker_sim = market.retrieve_ticker_prices("prices",ticker)
        ticker_sim = p.column_date_processing(ticker_sim)
        ticker_sim = ticker_sim[(ticker_sim["date"]>=(start_date-timedelta(days=100))) & (ticker_sim["date"]<end_date)]
        for i in range(2,5):
            ticker_sim[f"return_{i}"] = (ticker_sim["adjclose"].shift(-i) - ticker_sim["adjclose"].shift(-1)) / ticker_sim["adjclose"].shift(-1)
        ticker_sim["day"] = [x.weekday() for x in ticker_sim["date"]]
        returns = ticker_sim[ticker_sim["day"]==0]
        returns["weekly_return"] = returns["return_4"]
        new_sim = ticker_sim.merge(returns[["year","week","weekly_return"]], on=["year","week"],how="left") \
                            .merge(bench_returns[["year","week","bench_return","variance"]],on=["year","week"],how="left")
        new_sim["market_cov"] = new_sim["weekly_return"].rolling(window=100).cov(new_sim["bench_return"])
        completed = new_sim.copy()
        completed["beta"] = completed["market_cov"] / completed["variance"]
        completed  = completed.dropna()
        new_prices.append(completed)
    except:
        continue
market.disconnect()
price_returns = pd.concat(new_prices)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 491/491 [00:38<00:00, 12.88it/s]


In [8]:
price_returns

Unnamed: 0,date,close,high,low,open,volume,adjclose,adjhigh,adjlow,adjopen,...,week,return_2,return_3,return_4,day,weekly_return,bench_return,variance,market_cov,beta
99,2019-10-18,163.10,164.27,162.98,163.56,2693736,142.910499,143.935669,142.805353,143.313557,...,42,0.009642,0.016994,-0.024406,4,0.000736,0.001267,0.000211,0.000187,0.883434
100,2019-10-21,165.94,166.15,164.76,165.47,2011589,145.398946,145.582951,144.365014,144.987126,...,43,0.007282,-0.033723,-0.008655,0,-0.008655,0.008662,0.000205,0.000185,0.902370
101,2019-10-21,165.94,166.15,164.76,165.47,2011589,145.398946,145.582951,144.365014,144.987126,...,43,0.007282,-0.033723,-0.008655,0,-0.008655,0.011405,0.000194,0.000183,0.945017
102,2019-10-21,165.94,166.15,164.76,165.47,2011589,145.398946,145.582951,144.365014,144.987126,...,43,0.007282,-0.033723,-0.008655,0,-0.008655,0.009455,0.000189,0.000180,0.955665
103,2019-10-21,165.94,166.15,164.76,165.47,2011589,145.398946,145.582951,144.365014,144.987126,...,43,0.007282,-0.033723,-0.008655,0,-0.008655,0.008421,0.000186,0.000177,0.953741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3991,2022-12-23,240.94,241.65,236.59,237.92,1049559,239.572452,240.278423,235.247143,236.569594,...,51,-0.013193,-0.001701,-0.009003,4,-0.012177,0.006228,0.000653,0.000078,0.119191
3992,2022-12-23,240.94,241.65,236.59,237.92,1049559,239.572452,240.278423,235.247143,236.569594,...,51,-0.013193,-0.001701,-0.009003,4,-0.012177,-0.012506,0.000653,0.000078,0.119359
3993,2022-12-23,240.94,241.65,236.59,237.92,1049559,239.572452,240.278423,235.247143,236.569594,...,51,-0.013193,-0.001701,-0.009003,4,-0.012177,-0.010664,0.000652,0.000077,0.118571
3994,2022-12-23,240.94,241.65,236.59,237.92,1049559,239.572452,240.278423,235.247143,236.569594,...,51,-0.013193,-0.001701,-0.009003,4,-0.012177,0.001384,0.000652,0.000078,0.120410


In [8]:
speculation_db.connect()
simulation = speculation_db.retrieve("sim")
speculation_db.disconnect()

In [9]:
classification_db.connect()
classification_simulation = classification_db.retrieve("sim")
classification_db.disconnect()

In [10]:
simulation["week"] = simulation["week"] + 1
classification_simulation["week"] = classification_simulation["week"] + 1
classification_simulation.rename(columns={"prediction":"classification_prediction"},inplace=True)

In [11]:
simulation = price_returns.merge(simulation.drop("adjclose",axis=1)[["year","week","ticker","prediction"]],on=["year","week","ticker"],how="left") \
                    .merge(classification_simulation.drop(["adjclose","training_years"],axis=1)[["year","week","ticker","classification_prediction"]],on=["year","week","ticker"],how="left") \
                    .merge(tyields[["date","weekly_yield"]],on="date",how="left")

In [12]:
sim = simulation.dropna()

In [13]:
sim["projected_return"] = (sim["prediction"] - sim["adjclose"]) / sim["adjclose"]
sim["delta"] = [abs(x) for x in sim["projected_return"]]
sim["delta_sign"] = [1 if x >= 0 else -1 for x in sim["projected_return"]]
sim["market_return"] = math.exp(np.log(1.15)/52)
sim["rrr"] = sim["weekly_yield"] + sim["beta"] * (sim["market_return"] - sim["weekly_yield"]) - 1
sim = sim.groupby(["date","ticker"]).mean().reset_index()

In [14]:
sim.sort_values("date",inplace=True)
sim[["date","ticker","return_4","delta","projected_return","delta_sign","rrr"]]

Unnamed: 0,date,ticker,return_4,delta,projected_return,delta_sign,rrr
0,2019-10-18,A,-0.006359,0.002945,-0.002945,-1.0,0.001632
331,2019-10-18,NWSA,0.011722,0.011156,-0.011156,-1.0,0.002180
330,2019-10-18,NWL,-0.023594,0.022173,-0.022173,-1.0,0.000666
329,2019-10-18,NVR,-0.015835,0.216996,-0.216996,-1.0,-0.002167
328,2019-10-18,NVDA,0.004337,0.053381,-0.053381,-1.0,0.002404
...,...,...,...,...,...,...,...
355898,2022-12-23,EPAM,0.030240,0.083110,0.083110,1.0,0.001314
355897,2022-12-23,EOG,-0.019382,0.010806,-0.010806,-1.0,0.000616
355896,2022-12-23,ENPH,-0.034895,0.094967,0.094967,1.0,0.001159
355894,2022-12-23,EMN,-0.013446,0.042980,0.042980,1.0,0.001262


In [15]:
experimental.connect()
experimental.drop("complete")
experimental.store("complete",sim)
experimental.disconnect()

In [16]:
experimental.connect()
sim = experimental.retrieve("complete")
experimental.disconnect()

In [17]:
sim = sim.groupby(["date","ticker"]).mean().reset_index()

In [18]:
reqs = [0.05]
signals = [0.05]
values = [True]
classifications = [False]
ceilings = [True,False]
floors = [True,False]
hedges = [False]
parameters = []
training_year = 4
for value in values:
    for classification in classifications:
        for ceiling in ceilings:
            for floor in floors:
                for hedge in hedges:
                    for signal in signals:
                        for req in reqs:
                            parameter = {"value":value
                                         ,"classification":classification
                                         ,"ceiling":ceiling
                                         ,"floor":floor
                                         ,"hedge":hedge
                                         ,"signal":signal
                                         ,"req":req
                                         ,"training_years":training_year}
                            parameters.append(parameter)

In [19]:
len(parameters)

4

In [20]:
parameter = parameters[0]

In [None]:
experimental.connect()
experimental.drop("trades")
for parameter in tqdm(parameters):
    current_sim = sim.copy()
    b.experimental_backtest(current_sim,parameter,start_date,end_date,experimental)
experimental.disconnect()

  0%|                                                                                                                                                          | 0/4 [00:00<?, ?it/s]