In [1]:
from database.market import Market
from database.adatabase import ADatabase
from modeler.modeler import Modeler as m
from processor.processor import Processor as p
from datetime import datetime, timedelta
import pytz
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pickle

In [2]:
market = Market()

In [8]:
unit_db = ADatabase("strategy_unit")

In [4]:
market.connect()
prices = market.retrieve("alpha_prices")
market.disconnect()

In [5]:
prices = p.column_date_processing(prices)
prices["quarter"] = [x.quarter for x in prices["date"]]
prices["year"] = [x.year for x in prices["date"]]
prices["week"] = [x.week for x in prices["date"]]
prices["close"] = [float(x) for x in prices["close"]]

In [7]:
labels = prices[["year","quarter","week","ticker","close"]].groupby(["year","quarter","week","ticker"]).mean().reset_index()

In [9]:
sims = []
for strat in tqdm(["financial","speculation","competition"]):
    db = ADatabase(f"strategy_{strat}")
    db.connect()
    sim = db.retrieve("sim")
    db.disconnect()
    sim[f"{strat}_prediction"] = [sum([row[1][col] for col in sim.columns if "prediction" in col]) for row in sim.iterrows()]
    sims.append(sim)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:12<00:00,  4.28s/it]


In [32]:
data = prices.merge(sims[0][["year","quarter","ticker","financial_prediction"]],on=["year","quarter","ticker"],how="left")
data = data.merge(sims[1][["year","week","ticker","speculation_prediction"]],on=["year","week","ticker"],how="left")
data = data.merge(sims[2][["year","week","ticker","competition_prediction"]],on=["year","week","ticker"],how="left")

In [33]:
data = data.groupby(["year","week","ticker"]).mean().reset_index().drop("close",axis=1).merge(labels,on=["year","week","ticker"],how="left").dropna()

In [34]:
shifted = []
for ticker in data["ticker"].unique():
    ticker_data = data[data["ticker"]==ticker].copy()
    ticker_data["close"] = ticker_data["close"].shift(-1)
    shifted.append(ticker_data.dropna())

In [35]:
model_data = pd.concat(shifted)

In [36]:
start_year = 2018
end_year = 2022
factors = ["financial_prediction","speculation_prediction","competition_prediction"]
sim = []
unit_db.connect()
for year in tqdm(range(start_year,end_year)):
    try:
        training_data = model_data[(model_data["year"]>=year-1) & (model_data["year"]<year)].reset_index(drop=True)
        if training_data.index.size > 100:
            prediction_data = data[(data["year"]==year)]
            refined_data = {"X":training_data[factors],"y":training_data[["close"]].rename(columns={"close":"y"})}
            models = m.regression(refined_data)
            models["year"] = year
            for row in models.iterrows():
                api = row[1]["api"]
                prediction = row[1]["model"].predict(prediction_data[factors])
                prediction_data[f"{api}_prediction"] = prediction
                prediction_data[f"{api}_score"] = row[1]["score"]
            included_columns = ["year","week","ticker"]
            included_columns.extend([x for x in prediction_data.columns if "score" in x or "prediction" in x])
            prediction_data["year"] = prediction_data["year"] + 1
            sim.append(prediction_data[included_columns])
            unit_db.store("sim",prediction_data[included_columns])
    except Exception as e:
        print(year,str(e))
unit_db.disconnect()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:55<00:00, 13.97s/it]


In [37]:
simulation = pd.concat(sim).groupby(["year","week","ticker"]).mean().reset_index()
simulation = prices.merge(simulation,on=["year","week","ticker"]).dropna()

In [38]:
simulation["average_prediction"] = [sum([row[1][col] for col in simulation.columns if "prediction" in col]) for row in simulation.iterrows()]

In [39]:
simulation["average_prediction"] = simulation["average_prediction"] / 3
simulation["delta"] = (simulation["average_prediction"] - simulation["close"]) / simulation["close"]

In [40]:
simulation

Unnamed: 0,date,open,high,low,close,volume,ticker,quarter,year,week,...,speculation_prediction,competition_prediction,skl_prediction,skl_score,xgb_prediction,xgb_score,cat_prediction,cat_score,average_prediction,delta
0,2019-01-02,187.8200,190.9900,186.7000,190.95,2475193,MMM,1,2019,1,...,609.498389,693.814304,238.521052,0.934355,237.530609,0.994804,221.444119,0.973888,753.246876,2.944734
1,2019-01-03,188.2800,188.2800,182.8900,183.76,3358241,MMM,1,2019,1,...,609.498389,693.814304,238.521052,0.934355,237.530609,0.994804,221.444119,0.973888,753.246876,3.099080
2,2019-01-04,186.7500,191.9800,186.0300,191.32,2995052,MMM,1,2019,1,...,609.498389,693.814304,238.521052,0.934355,237.530609,0.994804,221.444119,0.973888,753.246876,2.937105
3,2019-12-30,177.2600,178.9600,175.5900,175.83,1715019,MMM,4,2019,1,...,609.498389,693.814304,238.521052,0.934355,237.530609,0.994804,221.444119,0.973888,753.246876,3.283950
4,2019-12-31,175.2300,176.5900,175.0700,176.42,1573520,MMM,4,2019,1,...,609.498389,693.814304,238.521052,0.934355,237.530609,0.994804,221.444119,0.973888,753.246876,3.269623
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424856,2022-03-24,117.2200,120.0000,115.9200,119.70,1858341,YUM,1,2022,12,...,316.359671,326.212886,114.669610,0.965589,114.798256,0.964908,111.784124,0.975904,451.316357,2.770396
424857,2022-03-25,120.1900,120.1900,118.7300,119.62,1242807,YUM,1,2022,12,...,316.359671,326.212886,114.669610,0.965589,114.798256,0.964908,111.784124,0.975904,451.316357,2.772917
424858,2022-03-28,119.9700,121.1900,119.0200,121.19,1280821,YUM,1,2022,13,...,321.892256,312.164798,112.613314,0.965589,112.915359,0.964908,111.171924,0.975904,442.776579,2.653574
424859,2022-03-29,122.7700,124.3400,121.7500,122.22,1692407,YUM,1,2022,13,...,321.892256,312.164798,112.613314,0.965589,112.915359,0.964908,111.171924,0.975904,442.776579,2.622783


In [41]:
positions = 10
start_date = simulation["date"].min()
end_date = simulation["date"].max()
reqs = [1,3,5,10]
limit = 14
values = [True,False]
trades = []
for value in tqdm(values):
    for req in tqdm(reqs):
        for position in range(positions):
            date = start_date
            iterration_sim = simulation.copy()
            if not value:
                iterration_sim["delta"] = iterration_sim["delta"] * -1
            while date < end_date:
                try:
                    todays_recs = iterration_sim[iterration_sim["date"]==date]
                    todays_recs.sort_values("delta",ascending=False,inplace=True)
                    if todays_recs.index.size > 0:
                        offering = todays_recs.iloc[position]
                        if offering["delta"] > req:
                            trade = offering
                            ticker = trade["ticker"]
                            buy_price = trade["close"]
                            exits = iterration_sim[(iterration_sim["ticker"]==ticker) & (iterration_sim["date"]>date)].iloc[:limit]
                            exits["gains"] = (exits["close"] - buy_price) / buy_price
                            exit = exits.sort_values("gains",ascending=False).iloc[0]
                            trade["sell_price"] = exit["close"]
                            trade["sell_date"] = exit["date"]
                            date = exit["date"] + timedelta(days=1)
                            trade["value"] = value
                            trade["req"] = req
                            trade["position"] = position
                            trades.append(trade)
                        else:
                            date = date + timedelta(days=1)
                    else:
                        date = date + timedelta(days=1)
                except Exception as e:
                    print(str(e))
                    date = date+timedelta(days=1)

  0%|                                                                                                                                                                                                                                             | 0/2 [00:00<?, ?it/s]
  0%|                                                                                                                                                                                                                                             | 0/4 [00:00<?, ?it/s][A
 25%|█████████████████████████████████████████████████████████▎                                                                                                                                                                           | 1/4 [00:37<01:51, 37.29s/it][A
 50%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                               

In [49]:
t = pd.DataFrame(trades)

In [50]:
t["delta"] =  (t["sell_price"] - t["close"]) / t["close"]

In [44]:
analysis = []
for value in tqdm(values):
    for req in tqdm(reqs):
        for position in range(positions):
            position_trades = t[(t["value"]==value) & (t["req"]==req) & (t["position"]==position)].sort_values("date")
            position_trades["delta"] = (position_trades["sell_price"] - position_trades["close"]) / position_trades["close"]
            initial = 100 / positions
            for delta in position_trades["delta"]:
                initial = initial * (1+delta)
            analysis.append({
                "value":value,
                "req":req,
                "position":position,
                "pv":initial
            })
            

  0%|                                                                                                                                                                                                                                             | 0/2 [00:00<?, ?it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 95.22it/s][A

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 102.57it/s][A
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

In [45]:
complete = pd.DataFrame(analysis).pivot_table(index=["value","req"],columns="position",values="pv").reset_index()

In [46]:
complete["pv"] = [sum([row[1][i] for i in range(10)]) for row in complete.iterrows()]

In [47]:
complete.sort_values("pv",ascending=False)

position,value,req,0,1,2,3,4,5,6,7,8,9,pv
4,True,1,2525.400035,380.763927,503.991459,523.531111,273.858418,622.210535,685.92618,721.678941,494.696275,1008.513502,7740.570384
5,True,3,2525.400035,380.763927,503.991459,523.531111,273.858418,622.210535,685.92618,721.678941,494.696275,1008.513502,7740.570384
6,True,5,2525.400035,380.763927,503.991459,523.531111,273.858418,622.210535,685.92618,721.678941,494.696275,1008.513502,7740.570384
7,True,10,2525.400035,418.388206,450.941493,299.052964,232.990381,135.993845,278.186726,273.901328,90.109877,306.786647,5011.751502
0,False,1,11.729608,12.516443,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,104.24605
1,False,3,11.729608,12.516443,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,104.24605
2,False,5,11.729608,12.516443,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,104.24605
3,False,10,11.729608,12.516443,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,104.24605


In [48]:
unit_db.connect()
unit_db.store("trades",t)
unit_db.disconnect()