In [29]:
from database.strategy import Strategy
from database.sec import SEC
from database.market import Market
from database.merrill import Merrill
from transformer.date_transformer import DateTransformer
from transformer.column_transformer import ColumnTransformer
from transformer.model_transformer import ModelTransformer
from transformer.product_transformer import ProductTransformer
from transformer.predictor_transformer import PredictorTransformer
from preprocessor.model_preprocessor import ModelPreprocessor
from preprocessor.predictor_preprocessor import PredictorPreprocessor
from modeler.modeler import Modeler as sp
from utils.date_utils import DateUtils
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, timezone
from tqdm import tqdm
import math
import requests as r
import os
from dotenv import load_dotenv
load_dotenv()
token=os.getenv("QUANDL")
import pickle

In [30]:
## Loading Constants
start = "2008-01-01"
end = datetime.now().strftime("%Y-%m-%d")
# Loading Databases
strat_db = Strategy("portfolio")
market = Market()
merrill = Merrill()
sec = SEC("sec")
market.connect()
tickers = market.retrieve_data("sp500").sort_values("Symbol")
market.close()
reload = True
model_range = range(len(tickers["Symbol"]))
bonds = ["A","AAA","B","BBB"]
datasets = ["pdr"
#             ,"tiingo"
#             ,"finnhub"
           ]
year = datetime.now().year
quarter = math.ceil((datetime.now().month - 1) // 3) + 1
m = Merrill()
FED = pd.read_csv("FED_metadata.csv")

In [31]:
ds = []
if reload:
    FED["name_simplified"] = [" ".join(row[1]["name"].split(" ")[:5]) for row in FED.iterrows()]
    ml = pd.read_csv("ML_metadata.csv")
    relevant = ml[ml["name"].str.contains("yield")]
    m.connect()
    for code in relevant["code"]:
        name = code.split("E")[0]
        print(code,name)
        url = "https://www.quandl.com/api/v3/datasets/ML/{}".format(code)
        params ={"start_date":"1997-1-1",
                 "end_date":end,
                 "api_key":token}
        data = r.get(url,params)
        yields = pd.DataFrame(data.json()["dataset"]["data"]).rename(columns={0:"date",1:"yield"})
        m.drop_table(name)
        m.store_data(name,yields)
    m.close()   
    merrill.connect()
    for bond in bonds:
        data = merrill.retrieve_data(bond)
        ds.append(data)
    merrill.close()
else:
    merrill.connect()
    for bond in bonds:
        data = merrill.retrieve_data(bond)
        ds.append(data)
    merrill.close()

AAAEY AAA
AEY A
BBBEY BBB
BEY B


In [32]:
base = ds[0]
base.rename(columns={"yield":bonds[0]},inplace=True)
for i in range(1,len(bonds)):
    data = ds[i]
    data.rename(columns={"yield":bonds[i]},inplace=True)
    base = base.merge(data.drop("_id",axis=1),on="date",how="left")

In [33]:
base = DateTransformer.convert_to_date("pdr",base,"date")
base["year"] = [x.year for x in base["date"]]
base["quarter"] = [x.quarter for x in base["date"]]
base["week"] = [x.week for x in base["date"]]
bond_weekly = base.groupby(["year","quarter","week"]).mean().reset_index()

In [34]:
sims = []
gap = 7
training_years = 1
sec.connect()
strat_db.connect()
market.connect()
for dataset in datasets:
    strat_db.drop_table("{}_bond_models".format(dataset))
    try:
        ## Setting Up
        print(year,quarter)
        date_ranges = DateUtils.create_quarterly_training_range_rec(year,quarter,training_years,gap)
        training_start,training_end,prediction_start,prediction_end = date_ranges
        dates = pd.to_datetime(date_ranges)
        quarters = [x.quarter for x in dates]
        years = [x.year for x in dates]
        print(date_ranges)
        ### switch to all tickers
        for i in model_range:
            try:
                ticker = tickers.iloc[i]["Symbol"].replace(".","-")
                price = market.retrieve_price_data("{}_prices".format(dataset),ticker)
                if dataset == "pdr":
                    price = ColumnTransformer.rename_columns(price," ")
                else:
                    price = ColumnTransformer.rename_columns(price,"")
                price = DateTransformer.convert_to_date(dataset,price,"date")
                mt = ModelTransformer(ticker,training_start,training_end,gap)
                pt = ModelTransformer(ticker,prediction_start,prediction_end,gap)
                mr = ModelPreprocessor(ticker)
                prot = ProductTransformer(ticker,prediction_start,prediction_end)
                ## regression_model
                rfd = mt.bond_merge(price.copy(),bond_weekly.copy(),True,classify=False).reset_index()
                refined = mr.fundamental_preprocess(rfd.copy())
                rfr = sp.regression(refined,ranked=False,tf=False,deep=False)
                # classification_model
                cfd = mt.bond_merge(price.copy(),bond_weekly.copy(),True,classify=True).reset_index()
                refined = mr.fundamental_preprocess(cfd.copy())
                cfr = sp.classification(refined,tf=False,deep=False)
                fundamental_results = pd.DataFrame([cfr,rfr])
                fundamental_results["ticker"] = ticker
                fundamental_results["model"] = [pickle.dumps(x) for x in fundamental_results["model"]]
                strat_db.store_data("{}_bond_models".format(dataset),fundamental_results)
            except Exception as e:
                message = {"status":"quarterly modeling","ticker":ticker,"year":str(year),"quarter":str(quarter),"message":str(e)}
                print(message)
    except Exception as e:
        print(year,quarter,str(e))
sec.close()
market.close()
strat_db.close()

2021 2
['2020-04-01', '2021-03-31', '2021-04-01', '2021-06-30']
{'status': 'quarterly modeling', 'ticker': 'CXO', 'year': '2021', 'quarter': '2', 'message': "'t'"}
{'status': 'quarterly modeling', 'ticker': 'TIF', 'year': '2021', 'quarter': '2', 'message': "'t'"}
{'status': 'quarterly modeling', 'ticker': 'WRK', 'year': '2021', 'quarter': '2', 'message': 'With n_samples=1, test_size=0.25 and train_size=0.75, the resulting train set will be empty. Adjust any of the aforementioned parameters.'}


In [35]:
fundamental_results

Unnamed: 0,api,model,score,model_type,ticker
5,skl,b'\x80\x04\x95\xfd\x02\x00\x00\x00\x00\x00\x00...,0.8,classification,ZTS
0,xgb,b'\x80\x04\x95w\x02\x00\x00\x00\x00\x00\x00\x8...,0.028311,regression,ZTS
