In [35]:
from database.market import Market
from database.sec import SEC
from database.adatabase import ADatabase
import pandas as pd
import matplotlib.pyplot as plt
from processor.processor import Processor as p
from statistics import variance
from tqdm import tqdm
from modeler.modeler import Modeler as m
from datetime import datetime, timedelta

In [36]:
market = Market()

In [37]:
market.connect()
prices = market.retrieve("prices")
market.disconnect()

In [38]:
main_db = ADatabase("strat_eg")

In [39]:
prices = p.column_date_processing(prices)
prices["year"] = [x.year for x in prices["date"]]
prices["week"] = [x.week for x in prices["date"]]
prices["quarter"] = [x.quarter for x in prices["date"]]

In [76]:
main_db.connect()
t = main_db.retrieve("trades_industry")
main_db.disconnect()

In [77]:
t["delta"] =  (t["sell_price"] - t["adjclose"]) / t["adjclose"]

In [78]:
analysis = []
xgbs = [True,False]
cats = [True,False]
skls = [True,False]
econs = [True,False]
financials = [True,False]
prediction_horizons = [1,2,4,12]
positions = 11
for prediction_horizon in tqdm(prediction_horizons):
    for xgb in xgbs:
        for cat in cats:
            for skl in skls:
                for econ in econs:
                    for financial in financials:
                        for position in range(positions):
                            position_trades = t[(t["position"]==position) \
                                                & (t["prediction_horizon"]==prediction_horizon) \
                                                & (t["xgb"]==xgb) \
                                               & (t["cat"]==cat) \
                                               & (t["skl"]==skl) \
                                               & (t["econ"]==econ) \
                                                & (t["financial"]==financial) \
                                               ].sort_values("date")
                            position_trades["delta"] = (position_trades["sell_price"] - position_trades["adjclose"]) / position_trades["adjclose"]
                            initial = 100 / positions
                            for delta in position_trades["delta"]:
                                initial = initial * (1+delta)
                            analysis.append({
                                "position":position,
                                "prediction_horizon": prediction_horizon,
                                "xgb": xgb,
                                "cat": cat,
                                "skl": skl,
                                "econ": econ,
                                "financial": financial,
                                "pv":initial
                            })

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.14it/s]


In [79]:
a = pd.DataFrame(analysis)

In [80]:
complete = a.pivot_table(index=[x for x in a.columns if x != "pv" and x != "position"],columns="position",values="pv").reset_index()

In [81]:
complete["pv"] = [sum([row[1][i] for i in range(positions)]) for row in complete.iterrows()]

In [82]:
complete.sort_values("pv",ascending=False).head(10)

position,prediction_horizon,xgb,cat,skl,econ,financial,0,1,2,3,4,5,6,7,8,9,10,pv
31,1,True,True,True,True,True,13.106754,19.836221,22.344056,11.664018,10.964185,11.237185,10.687108,27.893949,6.53635,19.276251,12.490786,166.036864
96,12,False,False,False,False,False,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,100.0
95,4,True,True,True,True,True,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,100.0
94,4,True,True,True,True,False,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,100.0
93,4,True,True,True,False,True,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,100.0
92,4,True,True,True,False,False,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,100.0
91,4,True,True,False,True,True,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,100.0
90,4,True,True,False,True,False,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,100.0
89,4,True,True,False,False,True,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,100.0
88,4,True,True,False,False,False,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,9.090909,100.0


In [None]:
t["count"] = 1

In [89]:
t.groupby("position").agg({"adjclose":"mean","delta":"sum"}).reset_index()

Unnamed: 0,position,adjclose,delta
0,0,17.048467,0.645783
1,1,191.328686,0.892149
2,2,107.120612,1.01238
3,3,10.782541,0.375143
4,4,98.872595,0.335548
5,5,20.672315,0.344086
6,6,12.492247,0.368349
7,7,22.636826,1.424355
8,8,14.291759,-0.146641
9,9,31.889485,0.810888


In [83]:
# report = []
# for prediction_horizon in tqdm(prediction_horizons):
#     for xgb in xgbs:
#         for cat in cats:
#             for skl in skls:
#                 for econ in econs:
#                     for financial in financials:
#                         best_iteration = t[(t["prediction_horizon"]==prediction_horizon) \
#                                                 & (t["xgb"]==xgb) \
#                                                & (t["cat"]==cat) \
#                                                & (t["skl"]==skl) \
#                                                & (t["econ"]==econ) \
#                                                 & (t["financial"]==financial) \
#                                                ].sort_values("date")
#                         stuff = []
#                         initial = 100
#                         if best_iteration.index.size > 0:
#                             for position in best_iteration["position"].unique():
#                                 pv = 100 / positions
#                                 risk_set = []
#                                 position_trades = best_iteration[best_iteration["position"]==position].sort_values("date")
#                                 for i in range(position_trades.index.size):
#                                     trade = position_trades.iloc[i]
#                                     holding_days = int((trade["sell_date"]-trade["date"]).days)
#                                     for i in range(holding_days+1):
#                                         updated_date = trade["date"] + timedelta(days=i)
#                                         risk_set.append({"ticker":trade["ticker"],"date":updated_date,"position":position})
#                                     mem = pd.DataFrame(risk_set)
#                                     price_data = prices[(prices["ticker"]==trade["ticker"]) & (prices["date"]>=trade["date"]) & (prices["date"]<=trade["sell_date"])][["date","ticker","adjclose"]].copy()
#                                     merged_mem = mem.merge(price_data,on=["date","ticker"],how="left").dropna()
#                                     merged_mem.sort_values("date",inplace=True)
#                                     if trade["delta"] > 0:
#                                         new_closes = list(merged_mem["adjclose"].iloc[:-1])
#                                         new_closes.append(trade["sell_price"])
#                                         merged_mem["adjclose"] = new_closes
#                                     volume = pv /merged_mem.iloc[0]["adjclose"] 
#                                     merged_mem["volume"] = volume
#                                     merged_mem["pv"] = merged_mem["adjclose"] * merged_mem["volume"]
#                                     pv = merged_mem["pv"].iloc[-1]
#                                     stuff.append(merged_mem)
#                             test = pd.concat(stuff)
#                             whut = test.pivot_table(index="date",columns="position",values="pv").fillna(method="ffill").reset_index()
#                             for i in range(positions):
#                                 if i not in whut.columns:
#                                     whut[i] = 100/positions
#                             whut["pv"] = [sum([row[1][i] for i in range(positions)]) for row in whut.iterrows()]
#                             whut["rolling"] = whut["pv"].rolling(window=100).mean()
#                             whut["abs_var"] = abs(whut["pv"] - whut["rolling"]) / whut["rolling"]
#                             whut["abs_var"].mean()
#                             whut["prediction_horizon"] = prediction_horizon
#                             whut["xgb"] = xgb
#                             whut["cat"] = cat
#                             whut["skl"]= skl
#                             whut["econ"]= econ
#                             whut["financial"]= financial
#                             report.append(whut[["date","pv","abs_var","prediction_horizon","xgb","cat","skl","econ","financial"]])


In [84]:
# report_df = pd.concat(report)

In [85]:
#report_df.groupby(["prediction_horizon","xgb","cat","skl","econ","financial"]).agg({"pv":"last","abs_var":"mean"}).sort_values("pv",ascending=False)

In [86]:
# for horizon in prediction_horizons:
#     whut = report_df[report_df["horizon"]==horizon]
#     plt.plot(whut["date"],whut["abs_var"])
# plt.legend(prediction_horizons)

In [87]:
# for horizon in prediction_horizons:
#     whut = report_df[report_df["horizon"]==horizon]
#     plt.plot(whut["date"],whut["pv"])
# plt.legend(prediction_horizons)