In [1]:
# Import required libraries
import wrds
import pandas as pd
import numpy as np

# WRDS Credentials 
WRDS_USERNAME = "saniazeb"

# Connect to WRDS database
db = wrds.Connection(wrds_username=WRDS_USERNAME)

# Define function to pull CDS data
def pull_cds_data(start_year=2001, end_year=2012):
    """
    Pulls 5-Year CDS data from WRDS Markit database for the given range of years.
    
    Returns:
        cds_data (DataFrame): Consolidated CDS data
    """
    all_data = []
    
    for year in range(start_year, end_year+1):
        table_name = f"markit.CDS{year}"
        query = f"""
        SELECT date, ticker, parspread
        FROM {table_name}
        WHERE tenor = '5Y' AND country = 'United States'
        """
        df = db.raw_sql(query, date_cols=['date'])
        all_data.append(df)
    
    cds_data = pd.concat(all_data, ignore_index=True)
    return cds_data

# Pull the CDS data
cds_data = pull_cds_data()

# Display first few rows
cds_data.head()


Loading library list...
Done


Unnamed: 0,date,ticker,parspread
0,2001-01-09,AA,0.004483
1,2001-01-09,AA,0.004192
2,2001-01-10,AA,0.00415
3,2001-01-10,AA,0.00388
4,2001-01-11,AA,0.00415


In [4]:
print(cds_data.columns)  # Check available column names
print(cds_data.head())   # Preview first few rows


Index(['ticker', 'parspread'], dtype='object')
                     ticker  parspread
date                                  
2001-01-02              ABS   0.012426
2001-01-02              AMR   0.016070
2001-01-02  AOL-TimeWarnInc   0.008174
2001-01-02              AVP   0.006171
2001-01-02              AXP   0.003252


In [6]:
import pandas as pd

# If 'date' is an index, reset it
if isinstance(cds_data.index, pd.DatetimeIndex):
    cds_data.reset_index(inplace=True)

# Ensure 'date' is in datetime format
cds_data['date'] = pd.to_datetime(cds_data['date'], errors='coerce')

# Drop rows where 'date' conversion failed
cds_data.dropna(subset=['date'], inplace=True)

# Ensure required columns exist
required_cols = {'date', 'ticker', 'parspread'}
missing_cols = required_cols - set(cds_data.columns)
if missing_cols:
    raise KeyError(f"Missing required columns: {missing_cols}")

# Group by 'date' and 'ticker', take the average spread
cds_data = cds_data.groupby(['date', 'ticker'], group_keys=False)['parspread'].mean().reset_index()

# Resample data to get **end-of-month values** using 'ME' instead of 'M'
cds_monthly = cds_data.set_index('date').groupby('ticker', group_keys=False).resample('ME').last().reset_index()

# Drop any rows with missing values after resampling
cds_monthly.dropna(inplace=True)

# Sort by date and parspread for quantile computation
cds_monthly = cds_monthly.sort_values(by=['date', 'parspread'])

# Assign quantiles (20 portfolios sorted by spread)
cds_monthly['quantile'] = cds_monthly.groupby('date')['parspread'].transform(
    lambda x: pd.qcut(x, 20, labels=False, duplicates='drop') + 1
)

# Drop any remaining missing quantile values
cds_monthly.dropna(subset=['quantile'], inplace=True)

# Convert quantile column to integer for correct pivoting
cds_monthly['quantile'] = cds_monthly['quantile'].astype(int)

# Ensure uniqueness in pivot table by taking the **median** in case of duplicates
cds_pivot = cds_monthly.pivot_table(index='date', columns='quantile', values='parspread', aggfunc='median')

# Rename columns to follow CDS_01, CDS_02, ..., CDS_20 naming convention
cds_pivot.columns = [f'CDS_{int(col):02d}' for col in cds_pivot.columns]

# Fill missing values using forward-fill to ensure smooth CDS spreads
cds_pivot.ffill(inplace=True)

# Reset index to keep 'date' as a column
cds_pivot.reset_index(inplace=True)

# Display processed data
print(cds_pivot.head())


        date    CDS_01    CDS_02    CDS_03    CDS_04    CDS_05    CDS_06  \
0 2001-01-31  0.002650  0.003450  0.004024  0.004411  0.005184  0.005576   
1 2001-02-28  0.002496  0.003038  0.004025  0.004381  0.005200  0.005729   
2 2001-03-31  0.002538  0.003216  0.003792  0.004077  0.004711  0.005462   
3 2001-04-30  0.002542  0.003135  0.003705  0.004127  0.004615  0.005179   
4 2001-05-31  0.002362  0.003000  0.003505  0.004000  0.004399  0.004833   

     CDS_07    CDS_08    CDS_09  ...    CDS_11    CDS_12    CDS_13    CDS_14  \
0  0.006131  0.006624  0.007332  ...  0.009014  0.010078  0.011173  0.012444   
1  0.006130  0.006942  0.007715  ...  0.009486  0.010559  0.011375  0.014500   
2  0.006066  0.006949  0.007750  ...  0.009158  0.010000  0.011000  0.012600   
3  0.005976  0.007129  0.007693  ...  0.009000  0.009612  0.010469  0.013000   
4  0.005814  0.006500  0.007197  ...  0.008260  0.009250  0.010575  0.012181   

     CDS_15    CDS_16    CDS_17    CDS_18    CDS_19    CDS_20 

In [13]:
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d

# Define Loss Given Default (LGD) as 60%
LGD = 0.6

# Define start and end dates for the analysis
start_date = "2001-01-01"
end_date = "2012-12-31"

# ✅ Ensure 'date' is already the index, else reset it
if not isinstance(cds_pivot.index, pd.DatetimeIndex):
    raise KeyError("Expected 'date' as the index in cds_pivot, but it is missing.")

# ✅ Fetch & interpolate risk-free interest rates
def fetch_interest_rates():
    """
    Simulates fetching raw interest rate data (e.g., cash and swap rates).
    """
    # Example synthetic risk-free rates (Annualized Rates)
    rates = {
        "1M": 0.02, "3M": 0.022, "6M": 0.023, "1Y": 0.025, "2Y": 0.027, 
        "5Y": 0.03, "10Y": 0.035, "30Y": 0.04
    }
    tenors = np.array([1/12, 3/12, 6/12, 1, 2, 5, 10, 30])  # Convert months/years to fractional years
    rate_values = np.array(list(rates.values()))

    # Interpolate for quarterly tenors
    f = interp1d(tenors, rate_values, kind='linear', fill_value="extrapolate")
    quarterly_tenors = np.arange(0.25, 5.25, 0.25)  # 1Q to 20Q
    quarterly_rates = f(quarterly_tenors)

    # Convert to discount factors
    discount_factors = 1 / (1 + quarterly_rates / 4) ** (quarterly_tenors * 4)
    
    return pd.DataFrame(discount_factors, index=quarterly_tenors).T

# ✅ Get quarterly discount factors
quarterly_discount = fetch_interest_rates()

# ✅ Compute hazard rate lambda
lambda_df = 4 * np.log(1 + (cds_pivot / (4 * LGD)))

# ✅ Initialize Risky Duration DataFrame
quarters = range(1, 21)
risky_duration = pd.DataFrame(index=lambda_df.index, columns=lambda_df.columns)

# ✅ Compute survival probabilities and Risky Duration (RD)
for col in lambda_df.columns:
    quarterly_survival_probability = pd.DataFrame(index=lambda_df.index, columns=quarters)

    for quarter in quarters:
        quarterly_survival_probability[quarter] = np.exp(-((quarter * lambda_df[col]) / 4))

    # ✅ Compute Risky Duration using interpolated discount factors
    temp_df = quarterly_survival_probability * quarterly_discount.values
    risky_duration[col] = 0.25 * temp_df.sum(axis=1)

# ✅ Shift Risky Duration by 1 period to get RD_t-1
risky_duration_shifted = risky_duration.shift(1).ffill()

# ✅ Compute CDS Returns using the correct formula
cds_returns = (cds_pivot / 250) + (4 * cds_pivot.diff() * risky_duration_shifted)

# ✅ Fix Column Naming for CDS Returns
cds_returns.columns = [f'CDS_{int(col.split("_")[1]):02d}_RET' for col in cds_returns.columns]

# ✅ Reset index to keep 'date' in the dataset
cds_returns.reset_index(inplace=True)

# ✅ Display the final output
print(cds_returns.head())


        date  CDS_01_RET  CDS_02_RET  CDS_03_RET  CDS_04_RET  CDS_05_RET  \
0 2001-01-31         NaN         NaN         NaN         NaN         NaN   
1 2001-02-28   -0.002823   -0.007527    0.000037   -0.000539    0.000317   
2 2001-03-31    0.000783    0.003275   -0.004236   -0.005524   -0.008871   
3 2001-04-30    0.000083   -0.001473   -0.001587    0.000925   -0.001741   
4 2001-05-31   -0.003294   -0.002469   -0.003629   -0.002300   -0.003910   

   CDS_06_RET  CDS_07_RET  CDS_08_RET  CDS_09_RET  ...  CDS_11_RET  \
0         NaN         NaN         NaN         NaN  ...         NaN   
1    0.002797   -0.000008    0.005785    0.006934  ...    0.008492   
2   -0.004826   -0.001135    0.000152    0.000653  ...   -0.005826   
3   -0.005115   -0.001606    0.003279   -0.000995  ...   -0.002792   
4   -0.006271   -0.002909   -0.011327   -0.008892  ...   -0.013209   

   CDS_12_RET  CDS_13_RET  CDS_14_RET  CDS_15_RET  CDS_16_RET  CDS_17_RET  \
0         NaN         NaN         NaN        