In [1]:
from datetime import datetime, timedelta
import pytz
import pandas as pd
from tqdm import tqdm
from modeler.modeler import Modeler as m
from processor.processor import Processor as p
from database.market import Market
from database.adatabase import ADatabase
from modeler_strats.universal_modeler import UniversalModeler
import numpy as np
import matplotlib.pyplot as plt
import pickle
from functional.backtester import Backtester as b
import math
from statistics import variance

In [2]:
start_date = datetime(2023,1,1)
end_date = datetime.now()

In [3]:
market = Market()
speculation_db = ADatabase("spec")
classification_db = ADatabase("spec_classification")
current_db = ADatabase("current")
umod = UniversalModeler()

In [4]:
market.connect()
sp500 = market.retrieve("sp500")
sp500 = sp500.rename(columns={"Symbol":"ticker"})
market.disconnect()

In [5]:
bench = pd.read_csv("./csv_files/FED/SPY.csv")
bench = p.column_date_processing(bench)
bench["day"] = [x.weekday() for x in bench["date"]]
bench_returns = bench
bench_returns[f"bench_return"] = (bench_returns["adjclose"].shift(-4) - bench_returns["adjclose"].shift(-1)) / bench_returns["adjclose"].shift(-1)
bench_returns["bench_return"] = bench_returns["bench_return"].fillna(0)
bench_returns["variance"] = bench_returns["bench_return"].rolling(window=100).var()
bench_returns

Unnamed: 0,date,open,high,low,close,adjclose,volume,year,quarter,week,day,bench_return,variance
0,2022-05-23,398.670013,415.380005,398.450012,415.26001,408.341675,166937000,2022,2,21,0,-0.045732,
1,2022-05-30,413.549988,417.440002,406.929993,410.540009,403.700317,334006700,2022,2,22,0,-0.017735,
2,2022-06-06,414.779999,416.609985,389.75,389.799988,383.305847,400315000,2022,2,23,0,0.066935,
3,2022-06-13,379.850006,383.899994,362.170013,365.859985,359.764679,645270700,2022,2,24,0,-0.01269,
4,2022-06-20,371.890015,390.089996,370.179993,390.079987,385.238129,344213700,2022,2,25,0,0.036329,
5,2022-06-27,391.049988,393.160004,372.559998,381.23999,376.507874,405582500,2022,2,26,0,0.059999,
6,2022-07-04,375.880005,390.640015,372.899994,388.670013,383.845642,288787900,2022,3,27,0,0.073586,
7,2022-07-11,385.850006,386.869995,371.040009,385.130005,380.349579,373575900,2022,3,28,0,0.08102,
8,2022-07-18,388.380005,400.179993,380.660004,395.089996,390.185944,350654600,2022,3,29,0,0.024637,
9,2022-07-25,395.75,413.029999,389.950012,411.98999,406.87616,349890300,2022,3,30,0,-0.019735,


In [6]:
tyields = pd.read_csv("./csv_files/FED/DGS1.csv")
tyields = p.column_date_processing(tyields)
tyields["dgs1"] = tyields["dgs1"].replace(".",0)
tyields["dgs1"] = tyields["dgs1"].astype("float")
tyields["yield"] = [1+(x/100) for x in tyields["dgs1"]]
tyields["weekly_yield"] = [math.exp(math.log(x)/52) for x in tyields["yield"]]
tyields

Unnamed: 0,date,dgs1,year,quarter,week,yield,weekly_yield
0,1962-01-02,3.22,1962,1,1,1.0322,1.000610
1,1962-01-03,3.24,1962,1,1,1.0324,1.000613
2,1962-01-04,3.24,1962,1,1,1.0324,1.000613
3,1962-01-05,3.26,1962,1,1,1.0326,1.000617
4,1962-01-08,3.31,1962,1,2,1.0331,1.000626
...,...,...,...,...,...,...,...
16013,2023-05-19,5.02,2023,2,20,1.0502,1.000942
16014,2023-05-22,5.07,2023,2,21,1.0507,1.000952
16015,2023-05-23,5.06,2023,2,21,1.0506,1.000950
16016,2023-05-24,5.12,2023,2,21,1.0512,1.000961


In [7]:
classification_factors = []
classification_factors.append("d1")
classification_factors.append("d2")
classification_factors.append("d3")
classification_factors.append("rolling14")

In [8]:
factors = [str(x) for x in range(14)]

In [9]:
included_columns=[
    "year",
    "week",
    "ticker",
    "adjclose",
    "y"
]
included_columns.extend(factors)
included_columns.extend(classification_factors)

In [10]:
trades = []
reqs = [0.05]
signals = [0.05]
values = [True]
classifications = [True,False]
ceilings = [True,False]
floors = [True,False]
hedges = [False]
parameters = []
positions = 10
training_year = 4
for value in values:
    for classification in classifications:
        for ceiling in ceilings:
            for floor in floors:
                for hedge in hedges:
                    for signal in signals:
                        for req in reqs:
                            parameter = {"value":value
                                         ,"classification":classification
                                         ,"ceiling":ceiling
                                         ,"floor":floor
                                         ,"hedge":hedge
                                         ,"signal":signal
                                         ,"req":req
                                         ,"training_years":training_year}
                            parameters.append(parameter)

In [11]:
training_sets = []
for ticker in tqdm(sp500["ticker"].unique()):
    try:
        prices = market.retrieve_ticker_prices("prices",ticker)
        prices = p.column_date_processing(prices)
        prices["year"] = [x.year for x in prices["date"]]
        prices["quarter"] = [x.quarter for x in prices["date"]]
        ticker_data = prices[prices["ticker"]==ticker]
        ticker_data.sort_values("date",ascending=True,inplace=True)
        ticker_data["adjclose"] = [float(x) for x in ticker_data["adjclose"]]
        ticker_data = ticker_data.groupby(["year","week"]).mean().reset_index()
        for i in range(14):
            ticker_data[str(i)] = ticker_data["adjclose"].shift(i)
        ticker_data["d1"] = ticker_data["adjclose"].pct_change(periods=1)
        ticker_data["d2"] = ticker_data["d1"].pct_change(periods=1)
        ticker_data["d3"] = ticker_data["d2"].pct_change(periods=1)
        ticker_data["rolling14"] = ticker_data["adjclose"].rolling(window=14).mean()
        ticker_data.dropna(inplace=True)
        ticker_data["ticker"] = ticker
        ticker_data["y"] = ticker_data["adjclose"].shift(-1)
        ticker_data = ticker_data.replace([np.inf, -np.inf], np.nan).dropna()
        ticker_data.dropna(inplace=True)
        ticker_data = ticker_data[included_columns]
        training_sets.append(ticker_data)
    except:
        continue
data = pd.concat(training_sets)
data = data.dropna()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 490/490 [00:45<00:00, 10.77it/s]


In [12]:
market.connect()
sp500 = market.retrieve("sp500")
sp500 = sp500.rename(columns={"Symbol":"ticker"})
market.disconnect()

In [13]:
prediction_slice = data[data["year"]==start_date.year].reset_index(drop=True)

In [14]:
speculation_db.connect()
models = speculation_db.retrieve("models")
speculation_db.disconnect()

In [15]:
classification_db.connect()
classification_models = classification_db.retrieve("models")
classification_db.disconnect()

In [16]:
simulation = umod.recommend(models,prediction_slice.copy(),factors)
simulation["week"] = simulation["week"] + 1
simulation["prediction"] = (simulation["cat_prediction"] + simulation["xgb_prediction"] + simulation["tf_prediction"]) / 3

Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2023-03-03 11:17:12         2522
metadata.json                                  2023-03-03 11:17:12           64
variables.h5                                   2023-03-03 11:17:12        84160
Keras weights file (<HDF5 file "variables.h5" (mode r)>) loading:
...layers\dense
......vars
.........0
.........1
...layers\dense_1
......vars
.........0
.........1
...layers\dense_2
......vars
.........0
.........1
...metrics\mean
......vars
.........0
.........1
...metrics\mean_absolute_percentage_error
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........2
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars


In [17]:
simulation

Unnamed: 0,year,week,ticker,adjclose,y,0,1,2,3,4,...,d2,d3,rolling14,xgb_prediction,xgb_score,cat_prediction,cat_score,tf_prediction,tf_score,prediction
0,2023,2,MMM,122.681938,126.880705,122.681938,118.167401,120.018608,123.209868,123.805885,...,-3.476904,7.595792,119.935581,123.393387,0.99911,123.298255,0.937924,123.487595,0.996624,123.393079
1,2023,3,MMM,126.880705,120.493744,126.880705,122.681938,118.167401,120.018608,123.209868,...,-0.104170,-0.970039,121.158728,127.931686,0.99911,126.306590,0.937924,127.037865,0.996624,127.092047
2,2023,4,MMM,120.493744,114.338678,120.493744,126.880705,122.681938,118.167401,120.018608,...,-2.470813,22.719010,122.044239,121.359016,0.99911,120.771690,0.937924,125.509865,0.996624,122.546857
3,2023,5,MMM,114.338678,114.723524,114.338678,120.493744,126.880705,122.681938,118.167401,...,0.014775,-1.005980,122.214356,114.323181,0.99911,115.482854,0.937924,121.736809,0.996624,117.180948
4,2023,6,MMM,114.723524,113.570960,114.723524,114.338678,120.493744,126.880705,122.681938,...,-1.065891,-73.143548,121.923588,114.323181,0.99911,114.572088,0.937924,119.875771,0.996624,116.257013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9752,2023,17,ZTS,174.841118,174.408000,174.841118,171.964237,167.469901,164.446403,163.420608,...,-0.376617,-1.819388,166.749345,173.420776,0.99911,175.486834,0.937924,176.608688,0.996624,175.172100
9753,2023,18,ZTS,174.408000,179.778000,174.408000,174.841118,171.964237,167.469901,164.446403,...,-1.148074,2.048386,167.721723,173.420776,0.99911,175.486834,0.937924,178.244064,0.996624,175.717225
9754,2023,19,ZTS,179.778000,184.624000,179.778000,174.408000,174.841118,171.964237,167.469901,...,-13.429258,10.697206,168.708926,178.915390,0.99911,181.177957,0.937924,182.182175,0.996624,180.758507
9755,2023,20,ZTS,184.624000,180.702000,184.624000,179.778000,174.408000,174.841118,171.964237,...,-0.124535,-0.990727,169.967718,184.204208,0.99911,186.102953,0.937924,186.328445,0.996624,185.545202


In [18]:
classification_simulation = umod.recommend(classification_models,prediction_slice.copy(),classification_factors)
classification_simulation["week"] = classification_simulation["week"] + 1
classification_simulation["classification_prediction"] = ((classification_simulation["cat_prediction"] + \
                                                          classification_simulation["xgb_prediction"] + \
                                                          classification_simulation["tf_prediction"]) / 3) > 0.5

Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2023-03-15 09:46:40         2141
metadata.json                                  2023-03-15 09:46:40           64
variables.h5                                   2023-03-15 09:46:40        23208
Keras weights file (<HDF5 file "variables.h5" (mode r)>) loading:
...layers\dense
......vars
.........0
.........1
...layers\dense_1
......vars
.........0
.........1
...layers\dropout
......vars
...metrics\mean
......vars
.........0
.........1
...metrics\mean_metric_wrapper
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........2
.........3
.........4
.........5
.........6
.........7
.........8
...vars


In [19]:
classification_simulation["classification_prediction"] = [int(x) for x in classification_simulation["classification_prediction"]]

In [20]:
new_prices = []
market.connect()
for ticker in tqdm(sp500["ticker"].unique()):
    try:
        ticker_sim = market.retrieve_ticker_prices("prices",ticker)
        ticker_sim = p.column_date_processing(ticker_sim)
        for i in range(2,5):
            ticker_sim[f"return_{i}"] = (ticker_sim["adjclose"].shift(-i) - ticker_sim["adjclose"].shift(-1)) / ticker_sim["adjclose"].shift(-1)
        ticker_sim["day"] = [x.weekday() for x in ticker_sim["date"]]
        returns = ticker_sim[ticker_sim["day"]==0]
        returns["weekly_return"] = returns["return_4"]
        new_sim = ticker_sim.merge(returns[["year","week","weekly_return"]], on=["year","week"],how="left")
        new_sim = new_sim.merge(bench_returns[["year","week","bench_return","variance"]],on=["year","week"],how="left").dropna()
        new_sim["market_cov"] = new_sim["weekly_return"].rolling(window=100).cov(new_sim["bench_return"])
        covs = new_sim["weekly_return"].rolling(window=100).cov(new_sim["bench_return"])
        completed = new_sim.copy()
        completed["beta"] = completed["market_cov"] / completed["variance"]
        completed  = completed.dropna().groupby(["date","ticker"]).mean().reset_index()
        new_prices.append(completed)
    except:
        continue
market.disconnect()
price_returns = pd.concat(new_prices)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 490/490 [00:43<00:00, 11.19it/s]


In [21]:
sim = price_returns.merge(simulation[["year","week","ticker","prediction"]],on=["year","week","ticker"],how="left") \
                    .merge(classification_simulation[["year","week","ticker","classification_prediction"]],on=["year","week","ticker"],how="left") \
                    .merge(tyields[["date","weekly_yield"]],on="date",how="left")


In [22]:
sim["projected_return"] = (sim["prediction"] - sim["adjclose"]) / sim["adjclose"]
sim["delta"] = [abs(x) for x in sim["projected_return"]]
sim["delta_sign"] = [1 if x >= 0 else -1 for x in sim["projected_return"]]
sim["market_return"] = math.exp(np.log(1.15)/52)
sim["rrr"] = sim["weekly_yield"] + sim["beta"] * (sim["market_return"] - sim["weekly_yield"]) - 1

In [23]:
sim.sort_values(["date","delta"]).tail(10)

Unnamed: 0,close,high,low,open,volume,adjclose,adjhigh,adjlow,adjopen,adjvolume,...,week,ticker,classification_prediction,date,weekly_yield,projected_return,delta,delta_sign,market_return,rrr


In [24]:
current_db.connect()
current_db.drop("trades")
for parameter in tqdm(parameters):
    b.backtest(sim.copy(),parameter,start_date,end_date,current_db)
current_db.disconnect()

 50%|█████████████████████████████████████████████████████████████                                                             | 4/8 [00:00<00:00, 30.76it/s]

current trades documents must be a non-empty list
current trades documents must be a non-empty list
current trades documents must be a non-empty list
current trades documents must be a non-empty list
current trades documents must be a non-empty list
current trades documents must be a non-empty list
current trades documents must be a non-empty list


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 30.30it/s]

current trades documents must be a non-empty list





In [25]:
current_db.connect()
t = current_db.retrieve("trades")
current_db.disconnect()

In [26]:
t = t.fillna(1)

In [27]:
analysis = []
indexer = list(parameters[0].keys())
for parameter in parameters:
    try:
        ledger = t.sort_values(["year","week"])
        for key in parameter.keys():
            ledger = ledger[ledger[key]==parameter[key]]
        portfolio = ledger.pivot_table(index=["year","week"],columns="position",values="actual_returns").fillna(1).reset_index()
        counted_columns = [x for x in range(positions)]
        cumulative = portfolio[[i for i in counted_columns]].cumprod()
        cumulative["date_string"] = [f'{int(row[1]["year"])}-W{int(row[1]["week"])}' for row in portfolio.iterrows()]
        cumulative["date"] = [datetime.strptime(x + '-1', '%G-W%V-%u') for x in cumulative["date_string"]]
        cumulative["pv"] = [sum([row[1][column] * 0.1 for column in counted_columns]) for row in cumulative.iterrows()]
        cumulative = cumulative.merge(bench_returns[["date","adjclose","bench_return","variance"]],on="date",how="left")
        cumulative["bench"] = [1 + (row[1]["adjclose"] - cumulative["adjclose"].iloc[0]) / cumulative["adjclose"].iloc[0] for row in cumulative.iterrows()]
        cumulative["return"] = cumulative["pv"].pct_change().fillna(1)
        cumulative["beta"] = cumulative[["return","bench_return"]].cov().iloc[0][1]/cumulative["variance"].iloc[-1]
        cumulative["rrr"] = tyields["yield"].iloc[-1] + cumulative["beta"].iloc[-1]*(cumulative["bench"].iloc[-1]-tyields["yield"].iloc[-1])
        cumulative["sharpe"] = (cumulative["pv"] - tyields["yield"].iloc[-1]) / cumulative["beta"].iloc[-1]
        for index_stuff in indexer:
            cumulative[index_stuff] = parameter[index_stuff]
        analysis.append(cumulative[indexer + ["pv","beta","rrr","sharpe"]].iloc[-1])
    except Exception as e:
        print(str(e))
        continue
a = pd.DataFrame(analysis)

'year'
'year'
'year'
'year'
'year'
'year'
'year'
'year'


In [28]:
a.sort_values("pv",ascending=False)

KeyError: 'pv'

In [None]:
top = a.sort_values("pv",ascending=False).iloc[0]
analysis = t.copy()
for key in parameter.keys():
    analysis = analysis[(analysis[key]==top[key])]
ledger = analysis[["year","week","ticker","delta","actual_returns","position"]]
ledger = ledger.merge(sp500[["ticker","GICS Sector","Security"]],on="ticker",how="left")

In [None]:
t.columns

In [None]:
ledger.sort_values(["week","position"]).to_csv("current.csv")

In [None]:
portfolio = ledger.pivot_table(index=["year","week"],columns="position",values="actual_returns").fillna(1).reset_index()
counted_columns = [x for x in range(ledger["position"].max()+1)]
for col in range(positions):
    if col not in counted_columns:
        portfolio[col] = 1
portfolio.reset_index()

In [None]:
counted_columns

In [None]:
counted_columns = [x for x in range(positions)]
cumulative = portfolio[[i for i in counted_columns]].cumprod()
cumulative["date_string"] = [f'{int(row[1]["year"])}-W{int(row[1]["week"])}' for row in portfolio.iterrows()]
cumulative["date"] = [datetime.strptime(x + '-1', '%G-W%V-%u') for x in cumulative["date_string"]]
cumulative["pv"] = [sum([row[1][column] * 0.1 for column in counted_columns]) for row in cumulative.iterrows()]
bench = pd.read_csv("./csv_files/FED/SPY.csv")
bench = p.column_date_processing(bench)
cumulative = cumulative.merge(bench[["date","adjclose"]],on="date",how="left")
cumulative["bench"] = [1 + (row[1]["adjclose"] - cumulative["adjclose"].iloc[0]) / cumulative["adjclose"].iloc[0] for row in cumulative.iterrows()]
cumulative

In [None]:
plt.figure(figsize=(12, 7), dpi=80)
plt.plot(cumulative["date"],cumulative["pv"])
plt.plot(cumulative["date"],cumulative["bench"])
plt.legend(["pv","bench"])

In [None]:
industry_analysis = ledger.pivot_table(index=["year","week"],columns="GICS Sector",values="actual_returns").fillna(1).reset_index()
industry_analysis[[x for x in industry_analysis.columns if x not in  ["year","week"]]].cumprod()