# Carry Trade Analysis


In [8]:
import os
import datetime
import quandl
import random

import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import pandas as pd
import numpy as np
import scipy as sp
import quandl
from sklearn.linear_model import LinearRegression



In [9]:
API_key_VX = "stqXZAdv9-_Dd2ZKAYFp"

In [10]:
def grab_quandl_table(
    table_path,
    avoid_download=False,
    replace_existing=False,
    date_override=None,
    allow_old_file=False,
    **kwargs,
):
    root_data_dir = os.path.join(os.environ["HOMEPATH"], "quandl_data_table_downloads")
    data_symlink = os.path.join(root_data_dir, f"{table_path}_latest.zip")
    if avoid_download and os.path.exists(data_symlink):
        print(f"Skipping any possible download of {table_path}")
        return data_symlink

    table_dir = os.path.dirname(data_symlink)
    if not os.path.isdir(table_dir):
        print(f'Creating new data dir {table_dir}')
        os.mkdir(table_dir)

    if date_override is None:
        my_date = datetime.datetime.now().strftime("%Y%m%d")
    else:
        my_date = date_override
    data_file = os.path.join(root_data_dir, f"{table_path}_{my_date}.zip")

    if os.path.exists(data_file):
        file_size = os.stat(data_file).st_size
        if replace_existing or not file_size > 0:
            print(f"Removing old file {data_file} size {file_size}")
        else:
            print(
                f"Data file {data_file} size {file_size} exists already, no need to download"
            )
            return data_file

    dl = quandl.export_table(
        table_path, filename=data_file, api_key=API_key_VX, **kwargs
    )
    file_size = os.stat(data_file).st_size
    if os.path.exists(data_file) and file_size > 0:
        print(f"Download finished: {file_size} bytes")
        if not date_override:
            if os.path.exists(data_symlink):
                print(f"Removing old symlink")
                os.unlink(data_symlink)
            print(f"Creating symlink: {data_file} -> {data_symlink}")
            os.symlink(
                data_file, data_symlink,
            )
    else:
        print(f"Data file {data_file} failed download")
        return
    return data_symlink if (date_override is None or allow_old_file) else "NoFileAvailable"


def fetch_quandl_table(table_path, avoid_download=True, **kwargs):
    return pd.read_csv(
        grab_quandl_table(table_path, avoid_download=avoid_download, **kwargs)
    )




In [4]:
path = '../data/Liq5YCDS.delim'
df = pd.read_csv(path, sep='\t').drop(columns=['Unnamed: 0'])

df.head()

Unnamed: 0,date,ticker,tenor,parspread,upfront,runningcoupon,cdsrealrecovery,cdsassumedrecovery,docclause,currency,tier,impliedrating
0,2018-01-01,BA,5Y,0.001793,-0.228583,0.05,0.4,0.4,MR14,USD,SNRFOR,BBB
1,2018-01-01,C,5Y,0.004113,-0.215978,0.05,0.4,0.4,MR14,USD,SNRFOR,A
2,2018-01-01,DD,5Y,0.002476,-0.22511,0.05,0.4,0.4,MR14,USD,SNRFOR,A
3,2018-01-01,F,5Y,0.008805,-0.005587,0.01,0.39,0.4,MR14,USD,SNRFOR,BB
4,2018-01-01,GE,5Y,0.004037,-0.028103,0.01,0.4,0.4,MR14,USD,SNRFOR,BBB


In [6]:
df.ticker.unique()

array(['BA', 'C', 'DD', 'F', 'GE', 'JPM', 'LNC', 'LOW', 'LUV', 'MAR', 'T',
       'WFC', 'WHR', 'XOM', 'XRX', 'NFLX'], dtype=object)

In [38]:
data = fetch_quandl_table("QUOTEMEDIA/PRICES", avoid_download=False)

Data file \Users\vince\quandl_data_table_downloads\QUOTEMEDIA/PRICES_20250219.zip size 1672923350 exists already, no need to download


In [58]:
ticker_set = set(df.ticker.unique())
ticker_set.add('SPY')

In [52]:
eq_price_data = data[data.ticker.isin(ticker_set)][['ticker', 'date', 'adj_close']]

In [53]:
eq_price_data.head()

Unnamed: 0,ticker,date,adj_close
246245,JPM,2023-06-05,133.946756
246754,NFLX,2023-06-05,403.54
256453,JPM,2023-04-11,123.767611
292489,BA,2023-06-05,208.78
300146,NFLX,2022-03-02,380.03


In [54]:
time_span_df = df.groupby('ticker')['date'].agg(['min', 'max'])

time_span_df

Unnamed: 0_level_0,min,max
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
BA,2018-01-01,2024-12-31
C,2018-01-01,2024-12-31
DD,2018-01-01,2023-02-03
F,2018-01-01,2024-12-31
GE,2018-01-01,2024-12-31
JPM,2018-01-01,2024-12-31
LNC,2018-01-01,2024-12-31
LOW,2018-01-01,2024-12-31
LUV,2018-01-01,2024-12-31
MAR,2018-01-01,2024-12-31


In [64]:
ts_df = eq_price_data.groupby('ticker')['date'].agg(['min', 'max'])

ts_df

Unnamed: 0_level_0,min,max
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
BA,1962-01-03,2025-02-12
C,1977-01-05,2025-02-12
F,1972-06-07,2025-02-12
GE,1962-01-03,2025-02-12
JPM,1984-01-04,2025-02-12
LNC,1984-10-10,2025-02-12
LOW,1985-07-03,2025-02-12
LUV,1980-01-02,2025-02-12
MAR,1993-10-13,2025-02-12
NFLX,2002-05-29,2025-02-12


We will remove DD from the dataset due to the lack of data. NFLX is also bad in the dataset, missing lots of returns.

In [None]:
#ticker_set.remove('DD')
ticker_set.remove('NFLX')

eq_price_data = eq_price_data[eq_price_data.ticker.isin(ticker_set)]
cds_df = df[df.ticker.isin(ticker_set)]

KeyError: 'NFLX'

# Initial Construction

Need to grab equity returns, ETF returns, CDS spread 'returns' and market returns. We will only look for Wednesday data.

In [155]:
cds_df['date'] = pd.to_datetime(cds_df['date'])
eq_price_data['date'] = pd.to_datetime(eq_price_data['date'])

cds_df = cds_df[['ticker', 'date', 'parspread']]

cds_df = cds_df[cds_df.date.dt.weekday == 2]
eq_price_data = eq_price_data[eq_price_data.date.dt.weekday == 2]

min_filter = eq_price_data.date >= '2018-01-01'
max_filter = eq_price_data.date <= '2024-12-31'

eq_price_data = eq_price_data[min_filter & max_filter]

In [156]:
eq_price_data['return'] = eq_price_data.sort_values(by=['ticker', 'date']).groupby('ticker')['adj_close'].pct_change().dropna()
cds_df['return'] = cds_df.sort_values(by=['ticker', 'date']).groupby('ticker')['parspread'].pct_change().dropna()

In [157]:
cds_df.dropna(inplace=True)
eq_price_data.dropna(inplace=True)

# Process OLS:

Need boxcar regressions and half life exponentially decaying regression.

In [158]:
eq_ret_df = eq_price_data.pivot(index='date', columns='ticker', values='return')
cds_ret_df = cds_df.pivot(index='date', columns='ticker', values='return')


In [159]:
cds_ret_df['index'] = cds_ret_df.apply(lambda row: row.mean(), axis=1)

In [173]:
def lin_regressions(X, y):
    X_np = X.to_numpy()
    y_np = y.to_numpy()

    if len(X_np.shape) == 1:
        X_np = X_np.reshape(-1, 1)

    beta = np.linalg.inv(X_np.T @ X_np) @ X_np.T @ y_np
    return beta.flatten()

In [169]:
def exp_decay_regression(X, y, H):
    n = len(y)
    weights = np.exp(-np.log(2) * np.arrange(n)[::-1] / H)
    W = np.diag(weights)

    X_np = X.to_numpy()
    y_np = y.to_numpy()

    XTWX = X_np.T @ W @ X_np
    XTWY = X_np.T @ W @ y_np

    beta = np.linalg.inv(XTWX) @ XTWY

    return beta.flatten()


In [162]:
eq_set = ticker_set.copy()
eq_set.remove('SPY')

In [165]:
common_ind = eq_ret_df.index.intersection(cds_ret_df.index)

eq_ret_df = eq_ret_df.loc[common_ind]
cds_ret_df = cds_ret_df.loc[common_ind]

In [None]:
def add_contemp_models(eq_df, cds_df, factor, exp=False):
    '''
    eq_df: equity dataframe 
    cds_df: cds dataframe
    exp: indicator of whether or not we are using exponential decay
    factor: the value which H or G is depending on whether we choose exponential decay or not

    will return a combined df with contemporaneous factors
    '''
    e_df = eq_df.copy()
    c_df = cds_df.copy()

    if not exp:
        # use factor as the boxcar window
        num_rows = len(e_df.index)

        for eq_name in eq_set:
            name1 = f'{eq_name} lambda'
            name2 = f'{eq_name} equity beta'
            name3 = f'{eq_name} cds index beta'
            l_list = [np.nan] * factor  # Initialize with NaNs for the first `factor` rows
            index_beta_list = [np.nan] * factor
            eq_beta_list = [np.nan] * factor

            for i in range(num_rows - factor):
                X_e_box = e_df[eq_name].iloc[i:i + factor]  # Select a window of `factor` rows
                y_e_box = e_df['SPY'].iloc[i:i + factor]  # Same for y

                X_c_box = c_df['index'].iloc[i:i + factor]
                y_c_box = c_df[eq_name].iloc[i:i + factor]

                X_eq_ind = pd.DataFrame({
                    eq_name: X_e_box,
                    'index': X_c_box
                })
                lmba = lin_regressions(X_e_box, y_e_box)
                betas_cont = lin_regressions(X_eq_ind, y_c_box)

                l_list.append(lmba[0])
                eq_beta_list.append(betas_cont[0])
                index_beta_list.append(betas_cont[1])

            e_df[name1] = l_list
            e_df[name2] = eq_beta_list
            e_df[name3] = index_beta_list

    for eq_name in eq_set:
        e_df = e_df.rename(columns={
            eq_name: f'{eq_name} eq'
        })
        c_df = c_df.rename(columns={
            eq_name: f'{eq_name} cds'
        })


    comb_df = pd.merge(e_df, c_df, left_index=True, right_index=True)

    mu_df = pd.DataFrame()

    for eq_name in eq_set:
        f_name = f'{eq_name} f'
        rho_name = f'{eq_name} rho'
        c_name = f'{eq_name} c'
        c_shift = f'{c_name} shift'
        equity_name = f'{eq_name} eq'
        cds_name = f'{eq_name} cds'

        lbda_name = f'{eq_name} lambda'
        eq_beta_name = f'{eq_name} equity beta'
        ind_beta_name = f'{cds_name} index beta'

        mu_name = f'{eq_name} mu'

        comb_df[f_name] = comb_df[equity_name] * comb_df[eq_beta_name] + comb_df['index'] * comb_df[ind_beta_name]
        comb_df[rho_name] = comb_df[cds_name] - comb_df[f_name]

        comb_df[c_name] = comb_df[equity_name] - comb_df[lbda_name] * comb_df['SPY']
        comb_df[c_shift] = comb_df[c_name].shift()

        # do the rho vs c regression
        # REDO THIS: IT IS A G WINDOW REGRESSION AND THE H FACTOR
        m_reg_df = comb_df[[c_shift, rho_name]].drop()

        mu_df[eq_name] = lin_regressions(m_reg_df[c_shift], m_reg_df[rho_name])

        

    return comb_df, mu_df




In [199]:
df1, df2 = add_contemp_models(eq_ret_df, cds_ret_df, factor=5, exp=False)

ValueError: Need to specify at least one of 'labels', 'index' or 'columns'