# THOMAS MCDONNELL QTS HOMEWORK 6: PREDICTIVE REGRESSION

In [185]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp
import quandl
from datetime import datetime
from datetime import timedelta
from tqdm import tqdm
from pandas import DateOffset
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS
import warnings
import statsmodels.regression.linear_model as linreg
# Suppress the warning
warnings.filterwarnings("ignore")

In [186]:
#boxcar regression function
def boxcar_5_reg(_df, x, y):
    _p = linreg.OLS(_df[x], _df[y]).fit(params_only=True).params
    return pd.DataFrame(data=[_p], index=[_df.index[-1]])

In [187]:
# data = pd.read_table("C:/Users/thoma/Downloads/Liq5YCDS.delim")
data = pd.read_table('/Users/tmac/Downloads/Liq5YCDS.delim')
tickers = data['ticker'].unique().tolist()
cds = pd.pivot_table(data, values = 'parspread', index = 'date', columns= 'ticker')
cds = cds.fillna(0)
cds.index = pd.to_datetime(cds.index)
cds = cds.resample('W-WED').ffill()

In [188]:
#get EOD price data
eod_price = pd.DataFrame()
for ticker in (tickers):
    df =  quandl.get_table('QUOTEMEDIA/PRICES', ticker = [ticker], api_key = 'uoxc6V3s61czWuHoGLcs',
                    qopts = { 'columns': ['date', 'adj_close'] }, 
                    date = { 'gte': '2018-01-01', 'lte': '2023-01-01' }, 
                     paginate=True)
    df = df.rename(columns = {'adj_close': ticker})
    if ticker == 'BA':
        eod_price = eod_price.append(df)
    else:
        eod_price = eod_price.merge(df, on = 'date')
eod_price = eod_price.set_index('date')

eod_price = eod_price.resample('W-WED').ffill()

In [189]:
#Get SPY data
m = quandl.get_table('QUOTEMEDIA/PRICES', ticker = 'SPY', api_key = 'uoxc6V3s61czWuHoGLcs',
                    qopts = { 'columns': ['date', 'adj_close'] }, 
                    date = { 'gte': '2018-01-01', 'lte': '2023-01-01' }, 
                     paginate=True)
m = m.set_index('date')
m = m.resample("W-WED").ffill()
m = m.diff().div(m.shift(1)).shift(-1).iloc[:-1]

In [190]:
# get return data
r_cds = cds.diff().div(cds.shift(1)).shift(-1).iloc[:-1]
r_equity = eod_price.diff().div(eod_price.shift(1)).shift(-1).iloc[:-1]
r_index = r_cds.mean(axis = 1).to_frame()


In [191]:
#dictionary of dataframes with data for each ticker needed for regressions
df_dict = {}
for ticker in tickers:
    df = pd.DataFrame(index = r_cds.index)
    df['r_equity'] = r_equity[ticker]
    df['r_cds'] = r_cds[ticker]
    df['r_index'] = r_index
    df['m'] = m
    df.replace([np.inf, -np.inf], 0, inplace = True)
    df_dict[ticker] = df

In [192]:
#dictionary of dataframes of model parameters for each ticker
model_1_dict = {}
for ticker in tickers:
    mod = pd.concat([boxcar_5_reg(df_dict[ticker].iloc[i-16:i], 'r_cds', ['r_equity', 'r_index'] ) for i in range(16, df_dict[ticker].shape[0])])
    model_1_dict[ticker] = mod

In [193]:
#dictionary of dataframes of capm model parameters for each ticker
capm_dict = {}
for ticker in tickers:
    mod = pd.concat([boxcar_5_reg(df_dict[ticker].iloc[i-16:i], 'r_equity', 'm' ) for i in range(16, df_dict[ticker].shape[0])])
    capm_dict[ticker] = mod

In [194]:
#dictionary of dataframes for predicted returns for each ticker given our parameters
predicted_returns_1 = {}
predicted_returns_capm = {}
for ticker in tickers:
    b = model_1_dict[ticker][['r_equity', 'r_index']]
    r = df_dict[ticker][['r_equity', 'r_index']]
    dat = pd.DataFrame()
    dat['rets'] = (b['r_equity']*r['r_equity']) + (b['r_index']*r['r_index'])
    dat.index.name = 'date'
    predicted_returns_1[ticker] = dat[['rets']]

    x = capm_dict[ticker]
    mark = df_dict[ticker][['m']]
    d = pd.DataFrame()
    d['rets'] = x['m'] * mark['m']
    d.index.name = 'date'
    predicted_returns_capm[ticker] = d[['rets']]



In [195]:
#residuals for model 1
residual_dict_1 = {}
for ticker in tickers:
    preds = predicted_returns_1[ticker]
    
    x = df_dict[ticker][['r_cds']].reindex(preds.index)
    da = pd.merge(preds, x, on = preds.index).set_index('key_0')
    da.index.name = 'date'
    res = da['r_cds'] - da['rets']
    residual_dict_1[ticker] = res.to_frame()

In [196]:
#equity residuals using capm
residual_dict_capm = {}
for ticker in tickers:
    preds = predicted_returns_capm[ticker]
    x = df_dict[ticker][['r_equity']].reindex(preds.index)
    da = pd.merge(preds, x, on = preds.index).set_index('key_0')
    da.index.name = 'date'
    res = da['r_equity'] - da['rets']
    residual_dict_capm[ticker] = res.to_frame()

In [197]:
#residuals in same df
final_residuals = {}
for ticker in tickers:
    x = pd.merge(residual_dict_1[ticker], residual_dict_capm[ticker], on = residual_dict_capm[ticker].index)
    x = x.set_index('key_0')
    x.index.name = 'date'
    x = x.rename(columns = {'0_x': 'cds residual', '0_y': 'equity residual'})
    final_residuals[ticker] = x


In [198]:
#exponential decay functions and model
exponential_residual_model_dict  = {}
for ticker in tickers:
    final_residuals[ticker].replace([np.inf, -np.inf], 0, inplace = True)
    final_residuals[ticker] =final_residuals[ticker].fillna(0)
    half_life = 1.2
    lma = 2**(-1./half_life)
    threshold = 1. / (final_residuals[ticker].shape[0]* 500)
    w12 = lma**np.arange(final_residuals[ticker].shape[0]-1, -1, -1)
    # Cuts off weight values too small to care about, allowing use of naive routines without losing accuracy or having too many points
    exp_1p2_wts = w12[ w12 > threshold]
    import statsmodels.regression.linear_model as linreg
    enough_points = np.where( w12 > 1./20 )[0].shape[0]
    def exp_1_2_reg(_df, x, y):
        _n = min(exp_1p2_wts.shape[0], _df.shape[0])
        _ldf = _df.iloc[-_n:]
        _wt = exp_1p2_wts[-_n:]
        _p = linreg.WLS(_ldf[x], _ldf[y], weights=_wt).fit(params_only=True).params
        return pd.DataFrame(data=[_p], index=[_df.index[-1]])

    exp_1_2_coeffs = pd.concat([exp_1_2_reg(final_residuals[ticker].iloc[max(0,i-exp_1p2_wts.shape[0]):i], 'cds residual', 'equity residual') for i in range(enough_points, final_residuals['BA'].shape[0])])
    exponential_residual_model_dict[ticker] =exp_1_2_coeffs

In [199]:
#residual boxcar regression
boxcar_residual_model_dict = {}
for ticker in tickers:
    mod = pd.concat([boxcar_5_reg(final_residuals[ticker].iloc[i-16:i], 'cds residual', 'equity residual' ) for i in range(16, final_residuals[ticker].shape[0])])
    boxcar_residual_model_dict[ticker] = mod

In [200]:
#final exponential residuals
exponential_residuals_of_residuals_dict = {}
for ticker in tickers:
    res = final_residuals[ticker][['equity residual', 'cds residual']]
    coef = exponential_residual_model_dict[ticker]
    res = res.reindex(coef.index)
    x = pd.merge(res, coef, on = res.index)
    x['preds'] = x['equity residual_x']*x['equity residual_y']
    x = x.set_index('key_0')
    x.index.name = 'date'
    x['final residual'] = x['cds residual'] - x['preds']
    x = x[['final residual']]
    exponential_residual_model_dict[ticker] = x

In [201]:
#final boxcar residuals
boxcar_residuals_of_residuals_dict = {}
for ticker in tickers:
    res = final_residuals[ticker][['equity residual', 'cds residual']]
    coef = boxcar_residual_model_dict[ticker]
    res = res.reindex(coef.index)
    x = pd.merge(res, coef, on = res.index)
    x['preds'] = x['equity residual_x']*x['equity residual_y']
    x = x.set_index('key_0')
    x.index.name = 'date'
    x['final residual'] = x['cds residual'] - x['preds']
    x = x[['final residual']]
    boxcar_residual_model_dict[ticker] = x

In [202]:
exponential_residual_model_dict['BA']

Unnamed: 0_level_0,final residual
date,Unnamed: 1_level_1
2018-02-07,0.000000
2018-02-14,0.000000
2018-02-21,0.000000
2018-02-28,0.000000
2018-03-07,0.000000
...,...
2022-11-23,0.050906
2022-11-30,-0.022244
2022-12-07,-0.000043
2022-12-14,0.006391


# 1 DATA

# 2 ANALYSIS