In [1]:
from database.market import Market
from database.sec import SEC
from database.adatabase import ADatabase
import pandas as pd
import matplotlib.pyplot as plt
from processor.processor import Processor as p
from statistics import variance
from tqdm import tqdm
from modeler.modeler import Modeler as m
from datetime import datetime, timedelta

In [2]:
market = Market()

In [3]:
market.connect()
prices = market.retrieve("prices")
market.disconnect()

In [459]:
main_db = ADatabase("strat_eg")

In [4]:
datasets = {}
for strat in ["econ","financial","speculation"]:
    strat_db = ADatabase(f"strategy_{strat}")
    strat_db.connect()
    sim = strat_db.retrieve("sim")
    strat_db.disconnect()
    strat_factors = [x for x in sim.columns if "prediction" in x or "score" in x]
    for factor in strat_factors:
        sim.rename(columns={factor:factor+f"_{strat}"},inplace=True)
    datasets[strat] = sim

In [5]:
financial = datasets["financial"]
econ = datasets["econ"][[x for x in datasets["econ"].columns if "econ" in x or x in ["year","quarter"]]]
speculation = datasets["speculation"]

In [6]:
quarterly_set = financial.merge(econ,on=["year","quarter"],how="left").groupby(["year","quarter","ticker"]).mean().reset_index()

In [7]:
prices = p.column_date_processing(prices)
prices["year"] = [x.year for x in prices["date"]]
prices["week"] = [x.week for x in prices["date"]]
prices["quarter"] = [x.quarter for x in prices["date"]]

In [8]:
weekly_set = prices.merge(speculation,on=["year","week","ticker"]).dropna()

In [9]:
data = weekly_set.merge(quarterly_set,on=["year","quarter","ticker"]).groupby(["year","week","ticker"]).mean().reset_index()

In [410]:
data

Unnamed: 0,year,week,ticker,close,high,low,open,volume,adjclose,adjhigh,...,xgb_prediction_financial,xgb_score_financial,cat_prediction_financial,cat_score_financial,skl_prediction_econ,skl_score_econ,xgb_prediction_econ,xgb_score_econ,cat_prediction_econ,cat_score_econ
0,2018,1,A,68.616,68.95600,67.758,67.930,1635981.4,66.729983,67.060067,...,43.854263,0.828260,45.524871,0.892704,283.364489,0.989353,240.851105,0.932185,235.495669,0.975082
1,2018,1,AAL,48.552,49.16376,48.066,48.508,4192834.0,47.384376,47.981625,...,41.409863,0.828260,49.612891,0.892704,283.364489,0.989353,240.851105,0.932185,235.495669,0.975082
2,2018,1,AAPL,170.052,171.01000,168.566,169.440,26819737.8,40.771514,41.001728,...,117.499225,0.828260,132.502256,0.892704,283.364489,0.989353,240.851105,0.932185,235.495669,0.975082
3,2018,1,ABBV,98.208,98.66200,96.586,97.340,4612167.8,81.563551,81.942995,...,74.709951,0.828260,43.965202,0.892704,283.364489,0.989353,240.851105,0.932185,235.495669,0.975082
4,2018,1,ABC,90.466,90.86800,88.917,89.610,1264813.4,84.698228,85.075724,...,60.163467,0.828260,68.896526,0.892704,283.364489,0.989353,240.851105,0.932185,235.495669,0.975082
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59541,2021,52,WYNN,87.020,88.24500,86.025,87.280,1987899.5,87.020000,88.245000,...,219.274872,0.888794,245.583279,0.809253,489.751001,0.961143,334.148285,0.922918,247.196367,0.914868
59542,2021,52,XEL,67.220,67.28250,66.600,66.820,1212708.0,67.220000,67.282500,...,129.673843,0.888794,70.864098,0.809253,489.751001,0.961143,334.148285,0.922918,247.196367,0.914868
59543,2021,52,XLNX,221.525,223.83500,217.025,220.390,2879817.5,221.525000,223.835000,...,85.066063,0.888794,100.311842,0.809253,489.751001,0.961143,334.148285,0.922918,247.196367,0.914868
59544,2021,52,XRAY,56.124,56.56200,55.763,56.045,969413.0,56.101867,56.539777,...,59.909763,0.888794,65.800066,0.809253,489.751001,0.961143,334.148285,0.922918,247.196367,0.914868


In [476]:
prediction_horizons = [1,2,4,12]
trades = []
xgbs = [True,False]
cats = [True,False]
skls = [True,False]
econs = [True,False]
financials = [True,False]
for prediction_horizon in tqdm(prediction_horizons):
    final = []
    for ticker in data["ticker"].unique():
        ticker_data = data[data["ticker"]==ticker]
        ticker_data["y"] = ticker_data["adjclose"].shift(-prediction_horizon)
        final.append(ticker_data)
    stuff = pd.concat(final)
    factors = [x for x in stuff.columns if x == "adjclose" or "prediction" in x or "score" in x]
    for xgb in xgbs:
        for cat in cats:
            for skl in skls:
                for econ in econs:
                    for financial in financials:
                        col_dict = {"econ":econ,
                                   "financial":financial,
                                   "skl":skl,
                                   "cat":cat,
                                   "xgb":xgb}
                        iteration_factors = [x for x in factors]
                        for key in col_dict.keys():
                            if col_dict[key] == False:
                                iteration_factors = [x for x in iteration_factors if key not in x]
                            else:
                                continue
                        if len(iteration_factors) == 1:
                            continue
                        else:
                            training_set = stuff[stuff["year"]<2020].dropna()
                            prediction_set = stuff[stuff["year"]>=2020].dropna()
                            refined_data = {"X":training_set[iteration_factors],"y":training_set[["y"]]}
                            models = m.regression(refined_data)
                            for row in models.iterrows():
                                try:
                                    model = row[1]["model"]
                                    api = row[1]["api"]
                                    score = row[1]["score"]
                                    prediction_set[f"{api}_prediction"] = model.predict(prediction_set[iteration_factors])
                                    prediction_set[f"{api}_score"] = score
                                except:
                                    print(training_set.index.size)
                                    continue
                            prediction_cols = [x for x in prediction_set.columns if "prediction" in x]        
                            number_of_predictions = len(prediction_cols)
                            prediction_set["prediction"] = [sum([row[1][col] for col in prediction_cols]) / number_of_predictions for row in prediction_set.iterrows()]
                            cleaned_set = prediction_set[["year","week","ticker","prediction"]]
                            simulation = prices.merge(cleaned_set,on=["year","week","ticker"],how="left")
                            simulation["delta"] = (simulation["prediction"] - simulation["adjclose"]) / simulation["adjclose"]
                            positions = 5
                            start_date = datetime(2020,1,1)
                            end_date = datetime(2021,12,15)
                            limit = prediction_horizon * 5
                            main_db.connect()
                            for position in range(positions):
                                date = start_date
                                iterration_sim = simulation.copy()
                                while date < end_date:
                                    try:
                                        todays_recs = iterration_sim[iterration_sim["date"]==date]
                                        todays_recs.sort_values("delta",ascending=False,inplace=True)
                                        if todays_recs.index.size > 0:
                                            offering = todays_recs.iloc[position]
                                            if offering["delta"] > 0:
                                                trade = offering
                                                req = offering["delta"]
                                                ticker = trade["ticker"]
                                                buy_price = trade["adjclose"]
                                                exits = iterration_sim[(iterration_sim["ticker"]==ticker) & (iterration_sim["date"]>date)].iloc[:limit]
                                                exits["gains"] = (exits["adjclose"] - buy_price) / buy_price
                                                gain_exits = exits[exits["gains"]>=req].sort_values("date")
                                                if gain_exits.index.size < 1:
                                                    exit = iterration_sim[(iterration_sim["ticker"]==ticker) & (iterration_sim["date"]>date)].iloc[limit+1]
                                                    trade["sell_price"] = exit["adjclose"]
                                                else:
                                                    exit = gain_exits.iloc[0]
                                                    trade["sell_price"] = buy_price * (1+(req))
                                                trade["sell_date"] = exit["date"]
                                                date = exit["date"] + timedelta(days=1)
                                                trade["position"] = position
                                                trade["limit"] = limit
                                                trade["prediction_horizon"] = prediction_horizon
                                                for key in col_dict.keys():
                                                    trade[key] = col_dict[key]
                                                main_db.store("trades",pd.DataFrame([trade.to_dict()]))
                                            else:
                                                date = date + timedelta(days=1)
                                        else:
                                            date = date + timedelta(days=1)
                                    except Exception as e:
                                        print(str(e))
                                        date = date+timedelta(days=1)
                            main_db.disconnect()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [3:05:20<00:00, 2780.02s/it]
