In [2]:
from database.market import Market
from database.sec import SEC
from database.adatabase import ADatabase
import pandas as pd
import matplotlib.pyplot as plt
from processor.processor import Processor as p
from statistics import variance
from tqdm import tqdm
from modeler.modeler import Modeler as m
from datetime import datetime, timedelta

In [3]:
market = Market()
sec = SEC()

In [4]:
# market.connect()
# prices = market.retrieve("alpha_prices")
# market.disconnect()

In [5]:
fed_fund = pd.read_csv("./csv_files/FED/FEDFUNDS.csv")
spy = pd.read_csv("./csv_files/FED/SPY.csv")
gdp = pd.read_csv("./csv_files/FED/GDP.csv")
brent = pd.read_csv("./csv_files/FED/POILBREUSDM.csv")
wti = pd.read_csv("./csv_files/FED/POILWTIUSDM.csv")
cpi = pd.read_csv("./csv_files/FED/CPIAUCSL.csv")
unemployment = pd.read_csv("./csv_files/FED/UNRATE.csv")

In [6]:
datasets = {"spy":spy,
           "fed_fund":fed_fund,
           "gdp":gdp,
            "brent":brent,
            "wti":wti, 
            "cpi":cpi, 
            "unemployment":unemployment}

In [7]:
cleaned_sets = {}
for dataset in tqdm(datasets.keys()):
    try:
        cleaned = p.column_date_processing(datasets[dataset])
        cleaned["year"] = [x.year for x in cleaned["date"]]
        cleaned["quarter"] = [x.quarter for x in cleaned["date"]]
        cleaned.drop("week",axis=1,inplace=True)
        cleaned_quarterly = cleaned.groupby(["year","quarter"]).mean().reset_index()
        cleaned_sets[dataset] = cleaned_quarterly
    except Exception as e:
        print(str(e))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 159.06it/s]


In [8]:
base = cleaned_sets["spy"][["year","quarter","adjclose"]]
base.rename(columns={"adjclose":"spy"},inplace=True)

In [9]:
for dataset in cleaned_sets:
    if dataset == "spy":
        continue
    else:
        ds = cleaned_sets[dataset]
        base = base.merge(ds.drop("date",axis=1,errors="ignore"),on=["year","quarter"],how="left")

In [10]:
data = base.copy()

In [11]:
data.columns

Index(['year', 'quarter', 'spy', 'fedfunds', 'gdp', 'poilbreusdm',
       'poilwtiusdm', 'cpiaucsl', 'unrate'],
      dtype='object')

In [12]:
base["y"] = base["spy"].shift(-4)

In [13]:
base["date"] = [datetime(int(row[1]["year"]),int(row[1]["quarter"]-1) * 3 + 1,1) for row in base.iterrows()]

In [14]:
factors = [x for x in base.columns if x not in ["year","quarter","y","date"]]
factors

['spy', 'fedfunds', 'gdp', 'poilbreusdm', 'poilwtiusdm', 'cpiaucsl', 'unrate']

In [15]:
base

Unnamed: 0,year,quarter,spy,fedfunds,gdp,poilbreusdm,poilwtiusdm,cpiaucsl,unrate,y,date
0,2018,1,250.379577,1.446667,20155.486,67.15915,62.880746,249.254333,4.033333,254.38273,2018-01-01
1,2018,2,249.122674,1.736667,20470.197,74.890021,68.029565,250.681,3.933333,270.719942,2018-04-01
2,2018,3,263.790247,1.923333,20687.278,76.081132,69.730812,251.770333,3.766667,279.147975,2018-07-01
3,2018,4,250.940834,2.22,20819.269,68.32007,58.620091,252.69,3.833333,292.263762,2018-10-01
4,2019,1,254.38273,2.403333,21013.085,63.756896,54.797956,253.292667,3.866667,290.961983,2019-01-01
5,2019,2,270.719942,2.396667,21272.448,68.357847,59.758818,255.283,3.633333,280.831936,2019-04-01
6,2019,3,279.147975,2.19,21531.839,62.123769,56.355514,256.225,3.633333,319.365394,2019-07-01
7,2019,4,292.263762,1.643333,21706.532,62.555384,56.835405,257.785333,3.6,343.268623,2019-10-01
8,2020,1,290.961983,1.26,21538.032,51.178989,45.881804,258.618,3.8,374.677348,2020-01-01
9,2020,2,280.831936,0.06,19636.731,33.377157,28.023658,256.418333,12.966667,406.855821,2020-04-01


In [16]:
sims = []
for year in tqdm(range(2022,2023)):
    try:
        training_set = base[(base["year"]<year) & (base["year"]>=year-7)].copy().reset_index()
        prediction_set = base[base["year"]==year].copy()
        spy_model = m.regression({"X":training_set[factors],"y":training_set[["y"]]})
        sim = prediction_set.fillna(method="ffill")
        for row in spy_model.iterrows():
            model = row[1]["model"]
            api = row[1]["api"]
            score = row[1]["score"]
            predictions = model.predict(sim[factors])
            sim[f"{api}_prediction"] = predictions
            sim[f"{api}_score"] = score
            sims.append(sim)
    except Exception as e:
        print(year,str(e))

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.60s/it]


In [17]:
final_sim = pd.concat(sims)
final_sim["year"] = final_sim["year"] + 1

In [18]:
final_sim

Unnamed: 0,year,quarter,spy,fedfunds,gdp,poilbreusdm,poilwtiusdm,cpiaucsl,unrate,y,date,skl_prediction,skl_score,xgb_prediction,xgb_score,cat_prediction,cat_score
16,2023,1,438.321888,0.12,24740.48,97.443127,94.42825,284.607667,3.8,389.922995,2022-01-01,288.055438,0.830806,382.896179,0.720508,370.017348,0.82264
17,2023,2,404.610277,0.77,25248.476,111.987359,108.791291,291.821667,3.6,389.922995,2022-04-01,168.637047,0.830806,381.983185,0.720508,369.899264,0.82264
18,2023,3,393.955925,2.19,25723.941,97.834694,93.336901,295.884,3.566667,389.922995,2022-07-01,159.600645,0.830806,342.91922,0.720508,352.945099,0.82264
19,2023,4,382.871383,3.653333,25723.941,92.328604,85.681515,298.174333,3.6,389.922995,2022-10-01,119.259587,0.830806,337.512817,0.720508,357.060037,0.82264
16,2023,1,438.321888,0.12,24740.48,97.443127,94.42825,284.607667,3.8,389.922995,2022-01-01,288.055438,0.830806,382.896179,0.720508,370.017348,0.82264
17,2023,2,404.610277,0.77,25248.476,111.987359,108.791291,291.821667,3.6,389.922995,2022-04-01,168.637047,0.830806,381.983185,0.720508,369.899264,0.82264
18,2023,3,393.955925,2.19,25723.941,97.834694,93.336901,295.884,3.566667,389.922995,2022-07-01,159.600645,0.830806,342.91922,0.720508,352.945099,0.82264
19,2023,4,382.871383,3.653333,25723.941,92.328604,85.681515,298.174333,3.6,389.922995,2022-10-01,119.259587,0.830806,337.512817,0.720508,357.060037,0.82264
16,2023,1,438.321888,0.12,24740.48,97.443127,94.42825,284.607667,3.8,389.922995,2022-01-01,288.055438,0.830806,382.896179,0.720508,370.017348,0.82264
17,2023,2,404.610277,0.77,25248.476,111.987359,108.791291,291.821667,3.6,389.922995,2022-04-01,168.637047,0.830806,381.983185,0.720508,369.899264,0.82264


In [19]:
strat_db = ADatabase("strategy_econ")
strat_db.connect()
strat_db.store("sim",final_sim)
strat_db.disconnect()