In [30]:
from database.market import Market
from database.adatabase import ADatabase
from modeler.modeler import Modeler as m
from processor.processor import Processor as p
from datetime import datetime, timedelta
import pytz
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pickle

In [31]:
market = Market()

In [32]:
financial_db = ADatabase("strategy_financial")

In [33]:
market.connect()
sp5 = market.retrieve("sp500")
prices = market.retrieve("prices")
financials = market.retrieve("financial_set")
market.disconnect()

In [34]:
prices = p.column_date_processing(prices)
prices["quarter"] = [x.quarter for x in prices["date"]]
prices["year"] = [x.year for x in prices["date"]]

In [35]:
financials = p.column_date_processing(financials)

In [36]:
prices["close"] = [float(x) for x in prices["close"]]

In [37]:
labels = prices[["year","quarter","ticker","close"]].groupby(["year","quarter","ticker"]).mean().reset_index()
labels["year"] = labels["year"] - 1

In [38]:
data = financials.merge(labels,on=["year","quarter","ticker"])

In [39]:
factors = [x for x in data.columns if x not in ["ticker","year","quarter","close"]]

In [40]:
factors

['assets',
 'liabilitiesandstockholdersequity',
 'incometaxexpensebenefit',
 'retainedearningsaccumulateddeficit',
 'accumulatedothercomprehensiveincomelossnetoftax',
 'earningspersharebasic',
 'earningspersharediluted',
 'propertyplantandequipmentnet',
 'cashandcashequivalentsatcarryingvalue',
 'entitycommonstocksharesoutstanding',
 'weightedaveragenumberofdilutedsharesoutstanding',
 'weightedaveragenumberofsharesoutstandingbasic',
 'stockholdersequity']

In [41]:
data

Unnamed: 0,ticker,quarter,year,assets,liabilitiesandstockholdersequity,incometaxexpensebenefit,retainedearningsaccumulateddeficit,accumulatedothercomprehensiveincomelossnetoftax,earningspersharebasic,earningspersharediluted,propertyplantandequipmentnet,cashandcashequivalentsatcarryingvalue,entitycommonstocksharesoutstanding,weightedaveragenumberofdilutedsharesoutstanding,weightedaveragenumberofsharesoutstandingbasic,stockholdersequity,close
0,MMM,3,2009,2.594400e+10,2.594400e+10,5.767500e+08,2.246700e+10,-3.664500e+09,1.7700,1.752500,6.892500e+09,1.972000e+09,698320662.0,7.062500e+08,699600000.0,1.034150e+10,83.806719
1,MMM,3,2009,2.594400e+10,2.594400e+10,5.767500e+08,2.246700e+10,-3.664500e+09,1.7700,1.752500,6.892500e+09,1.972000e+09,698320662.0,7.062500e+08,699600000.0,1.034150e+10,83.806719
2,MMM,1,2010,2.652150e+10,2.652150e+10,1.646667e+09,2.299000e+10,-3.700000e+09,5.0700,5.003333,6.943000e+09,2.058000e+09,711733377.0,7.153000e+08,706000000.0,1.132200e+10,90.021935
3,MMM,1,2010,2.652150e+10,2.652150e+10,1.646667e+09,2.299000e+10,-3.700000e+09,5.0700,5.003333,6.943000e+09,2.058000e+09,711733377.0,7.153000e+08,706000000.0,1.132200e+10,90.021935
4,MMM,2,2010,2.763600e+10,2.763600e+10,3.385000e+08,2.399200e+10,-3.750500e+09,1.0300,1.015000,6.929500e+09,2.342250e+09,713068068.0,7.097000e+08,702650000.0,1.316500e+10,93.476825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13841,YUM,2,2020,5.658000e+09,5.658000e+09,2.450000e+07,-7.661500e+09,-4.685000e+08,0.5650,0.550000,1.211000e+09,8.795000e+08,300985972.0,3.110000e+08,305000000.0,-8.018750e+09,117.895873
13842,YUM,3,2020,5.826000e+09,5.826000e+09,8.175000e+07,-7.629500e+09,-4.750000e+08,1.0925,1.070000,1.217500e+09,9.240000e+08,301399629.0,3.105000e+08,304750000.0,-8.029500e+09,126.876563
13843,YUM,4,2020,5.646000e+09,5.646000e+09,8.600000e+07,-7.559000e+09,-4.426667e+08,1.5725,1.540000,1.199500e+09,8.575000e+08,301668133.0,3.102500e+08,304500000.0,-8.010000e+09,127.370492
13844,YUM,1,2021,5.541500e+09,5.541500e+09,1.640000e+08,-7.554000e+09,-3.995000e+08,1.7500,1.713636,1.202500e+09,5.423333e+08,300055312.0,3.163333e+08,310000000.0,-1.528817e+09,121.575556


In [42]:
start_year = 2019
end_year = 2021
performance = []
sim = []
financial_db.connect()
for year in tqdm(range(start_year,end_year)):
    try:
        training_data = data[(data["year"]>=year-7) & (data["year"]<year)].reset_index(drop=True)
        if training_data.index.size > 100:
            prediction_data = data[(data["year"]==year)]
            refined_data = {"X":training_data[factors],"y":training_data[["close"]].rename(columns={"close":"y"})}
            models = m.regression(refined_data)
            models["year"] = year
            for row in models.iterrows():
                api = row[1]["api"]
                prediction = row[1]["model"].predict(prediction_data[factors])
                prediction_data[f"{api}_prediction"] = prediction
                prediction_data[f"{api}_score"] = row[1]["score"]
            included_columns = ["year","quarter","ticker"]
            included_columns.extend([x for x in prediction_data.columns if "score" in x or "prediction" in x])
            prediction_data["year"] = prediction_data["year"] + 1
            sim.append(prediction_data[included_columns])
    except Exception as e:
        print(year,sector,str(e))
financial_db.disconnect()

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [16:37<00:00, 498.54s/it]


In [43]:
sim

[       year  quarter ticker  skl_prediction  skl_score  xgb_prediction  \
 49     2020        1    MMM      143.329729 -70.358786      169.010895   
 50     2020        2    MMM      127.287430 -70.358786       92.339050   
 51     2020        3    MMM      129.996898 -70.358786      167.547455   
 52     2020        4    MMM      132.445443 -70.358786      181.311829   
 83     2020        1   ABBV       61.412311 -70.358786       87.873268   
 ...     ...      ...    ...             ...        ...             ...   
 13791  2020        4   XLNX       98.313021 -70.358786       80.275558   
 13836  2020        1    YUM       82.165690 -70.358786       89.802773   
 13837  2020        2    YUM       81.178035 -70.358786       62.092606   
 13838  2020        3    YUM       81.577355 -70.358786       56.908657   
 13839  2020        4    YUM       81.789062 -70.358786       48.249004   
 
        xgb_score  cat_prediction  cat_score  
 49     -2.051541      197.040062   0.900437  
 50 

In [308]:
simulation = pd.concat(sim).groupby(["year","quarter","ticker"]).mean().reset_index()

In [302]:
# financial_db.connect()
# financial_db.store("deep_sim",simulation)
# financial_db.disconnect()

In [311]:
prediction_cols = [x for x in simulation.columns if "prediction" in x]        
number_of_predictions = len(prediction_cols)
simulation["prediction"] = [sum([row[1][col] for col in prediction_cols]) / number_of_predictions for row in simulation.iterrows()]
cleaned_set = simulation[["year","quarter","ticker","prediction"]]
new_set = []
for ticker in cleaned_set["ticker"].unique():
    ticker_data = cleaned_set[cleaned_set["ticker"]==ticker]
    ticker_data["prev"] = ticker_data["prediction"].shift(1)
    ticker_data["delta"] = (ticker_data["prediction"] - ticker_data["prev"]) / ticker_data["prev"]
    new_set.append(ticker_data)

In [315]:
ns = pd.concat(new_set).dropna()

In [316]:
simulation = prices.merge(ns,on=["year","quarter","ticker"],how="left")

In [318]:
simulation.dropna()

Unnamed: 0,date,close,high,low,open,volume,adjclose,adjhigh,adjlow,adjopen,adjvolume,divcash,splitfactor,ticker,quarter,year,prediction,prev,delta
6608,2020-04-01,133.14,135.840,131.010,132.000,3777420,125.502203,128.047313,123.494394,124.427601,3777420,0.0,1.0,MMM,2,2020,129.071230,169.793562,-0.239834
6609,2020-04-02,137.91,138.210,132.100,132.360,3778030,129.998564,130.281354,124.521865,124.766949,3778030,0.0,1.0,MMM,2,2020,129.071230,169.793562,-0.239834
6610,2020-04-03,133.79,137.440,132.900,136.010,3586246,126.114915,129.555527,125.275971,128.207561,3586246,0.0,1.0,MMM,2,2020,129.071230,169.793562,-0.239834
6611,2020-04-06,140.70,141.250,136.900,137.860,5625066,132.628511,133.146960,129.046505,129.951433,5625066,0.0,1.0,MMM,2,2020,129.071230,169.793562,-0.239834
6612,2020-04-07,144.60,150.640,144.400,147.500,5495584,136.304782,141.998287,136.116255,139.038418,5495584,0.0,1.0,MMM,2,2020,129.071230,169.793562,-0.239834
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2888343,2021-12-30,143.17,143.695,142.480,143.260,4982997,143.170000,143.695000,142.480000,143.260000,4982997,0.0,1.0,WMT,4,2021,135.686143,131.061638,0.035285
2888344,2021-12-31,144.69,145.045,142.920,143.200,5841647,144.690000,145.045000,142.920000,143.200000,5841647,0.0,1.0,WMT,4,2021,135.686143,131.061638,0.035285
2888353,2021-12-29,52.25,52.310,51.275,51.450,5117289,52.250000,52.310000,51.275000,51.450000,5117289,0.0,1.0,WBA,4,2021,95.569847,101.258202,-0.056177
2888354,2021-12-30,51.99,52.920,51.940,52.360,3653593,51.990000,52.920000,51.940000,52.360000,3653593,0.0,1.0,WBA,4,2021,95.569847,101.258202,-0.056177


In [323]:
simulation["delta"].median()

0.02237621357673299

In [180]:
industries = list(sp5["GICS Sector"].unique())

In [347]:
positions = 10
start_date = datetime(2020,5,15)
end_date = datetime(2021,12,15)
limit = 60
trades = []
for position in range(positions):
    industry = industries[position]
    industry_tickers = sp5[sp5["GICS Sector"]==industry]["Symbol"]
    date = start_date
    iterration_sim = simulation.copy()
    while date < end_date:
        try:
            todays_recs = iterration_sim[(iterration_sim["date"]==date)]
            todays_recs.sort_values("delta",ascending=False,inplace=True)
            if todays_recs.index.size > 0:
                offering = todays_recs.iloc[position]
                if (offering["delta"] > 0):
                    trade = offering
                    req = offering["delta"]
                    hedge = req * 0.05
                    ticker = trade["ticker"]
                    buy_price = trade["adjclose"]
                    if trade["quarter"] == 4:
                        next_quarter = 1
                        next_year = trade["year"] + 1
                    else:
                        next_year= trade["year"]
                        next_quarter = trade["quarter"]+1
                    end_of_quarter = datetime(next_year,(3*(next_quarter-1))+1,1)
                    if end_of_quarter > end_date:
                        end_of_quarter = end_date
                    exits = iterration_sim[(iterration_sim["ticker"]==ticker) & (iterration_sim["date"]>date)
                                          & (iterration_sim["date"]<end_of_quarter)]
                    exits["gains"] = (exits["adjclose"] - buy_price) / buy_price
                    gain_exits = exits[exits["gains"]>=req].sort_values("date")
                    if gain_exits.index.size < 1:
                        exit = iterration_sim[(iterration_sim["ticker"]==ticker) & (iterration_sim["date"]>=end_of_quarter)].iloc[0]
                        trade["sell_price"] = exit["adjclose"]
                    else:
                        exit = gain_exits.iloc[0]
                        trade["sell_price"] = buy_price * (1+(req))
                    delta = (trade["sell_price"] - buy_price) / buy_price
                    if delta > 0:
                        trade["delta"] = delta - hedge
                    else:
                        trade["delta"] = delta + hedge
                    trade["delta_hedgeless"] = delta
                    trade["hedge"] = hedge
                    trade["projected_delta"] = req
                    trade["sell_date"] = exit["date"]
                    date = exit["date"] + timedelta(days=1)
                    trade["position"] = position
                    trade["limit"] = limit
                    trades.append(trade)
                else:
                    date = date + timedelta(days=1)
            else:
                date = date + timedelta(days=1)
        except Exception as e:
            print(str(e))
            date = date+timedelta(days=1)

In [348]:
t = pd.DataFrame(trades)

In [349]:
analysis = []
for position in range(positions):
    position_trades = t[(t["position"]==position)].sort_values("date")
    position_trades["delta"] = (position_trades["sell_price"] - position_trades["adjclose"]) / position_trades["adjclose"]
    initial = 100 / positions
    for delta in position_trades["delta"]:
        initial = initial * (1+delta)
    analysis.append({
        "position":position,
        "pv":initial
    })

In [350]:
a = pd.DataFrame(analysis)

In [351]:
complete = a.pivot_table(index=[x for x in a.columns if x != "pv" and x != "position"],columns="position",values="pv").reset_index()
complete["pv"] = [sum([row[1][i] for i in range(positions)]) for row in complete.iterrows()]

In [352]:
complete

position,index,0,1,2,3,4,5,6,7,8,9,pv
0,pv,16.551736,31.354159,15.359655,17.512144,23.220344,13.876189,13.270335,19.186588,24.346912,21.203253,195.881313


In [353]:
t = t.merge(sp5.rename(columns={"Symbol":"ticker"})[["ticker","GICS Sector"]],on="ticker",how="left")

In [356]:
t[["position","date","sell_date","ticker","projected_delta","delta","hedge","delta_hedgeless","GICS Sector"]].sort_values("delta").tail(20)

Unnamed: 0,position,date,sell_date,ticker,projected_delta,delta,hedge,delta_hedgeless,GICS Sector
4,0,2021-04-05,2021-07-01,INTU,0.863919,0.179621,0.043196,0.222817,Information Technology
16,2,2020-10-02,2021-01-04,BEN,0.373274,0.180899,0.018664,0.199563,Financials
35,4,2021-10-04,2021-12-15,ABBV,0.453866,0.189341,0.022693,0.212034,Health Care
52,7,2020-07-02,2020-10-01,POOL,0.458509,0.194109,0.022925,0.217034,Consumer Discretionary
38,5,2020-10-02,2021-01-04,PRU,0.344781,0.195875,0.017239,0.213114,Financials
68,9,2020-05-15,2020-07-01,AES,0.275703,0.212598,0.013785,0.226383,Utilities
2,0,2020-10-02,2021-01-04,STX,0.452433,0.223005,0.022622,0.245626,Information Technology
9,1,2020-10-02,2021-01-04,BAC,0.44424,0.225908,0.022212,0.24812,Financials
18,2,2021-04-05,2021-07-01,HES,0.667603,0.22638,0.03338,0.25976,Energy
31,4,2020-10-02,2021-01-04,RJF,0.358156,0.249135,0.017908,0.267042,Financials


In [355]:
t.groupby("GICS Sector").mean()[["projected_delta","delta","hedge","delta_hedgeless"]].sort_values("delta")

Unnamed: 0_level_0,projected_delta,delta,hedge,delta_hedgeless
GICS Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Health Care,0.602632,0.012902,0.030132,0.017094
Consumer Staples,0.719532,0.033406,0.035977,0.059915
Communication Services,0.355535,0.034759,0.017777,0.052535
Industrials,0.544608,0.042852,0.02723,0.047824
Materials,0.537625,0.071927,0.026881,0.098808
Consumer Discretionary,0.993244,0.081782,0.049662,0.093064
Utilities,0.571177,0.092266,0.028559,0.08907
Information Technology,0.763855,0.098678,0.038193,0.134915
Energy,0.435522,0.107832,0.021776,0.117382
Real Estate,2.14126,0.11946,0.107063,0.226523
