# REQUIREMENTS

In [56]:
import wrds, datetime
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None  # default='warn'

conn = wrds.Connection()

WRDS recommends setting up a .pgpass file.
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


# DATA COLLECTION

In [57]:
sp500_data = conn.raw_sql("""
SELECT
    caldt AS date,
    sprtrn AS daily_return -- Equal-Weighted Return (includes distributions) (ewretd)
FROM
    crspq.dsp500
WHERE
    caldt >= '1990-01-01'
ORDER BY
    caldt;
""")
sp500_data['date'] = pd.to_datetime(sp500_data['date'])

sp500_data

Unnamed: 0,date,daily_return
0,1990-01-02,0.017799
1,1990-01-03,-0.002586
2,1990-01-04,-0.008613
3,1990-01-05,-0.009756
4,1990-01-08,0.004514
...,...,...
8748,2024-09-24,0.002511
8749,2024-09-25,-0.001861
8750,2024-09-26,0.004039
8751,2024-09-27,-0.001253


In [58]:
risk_free_data = conn.raw_sql('''
    SELECT
        date,
        rf AS daily_rf_rate -- One Month Treasury Bill Rate (daily)
    FROM
        ff.factors_daily
    WHERE
        date >= '1990-01-01'
''')

risk_free_data['date'] = pd.to_datetime(risk_free_data['date'])

risk_free_data

Unnamed: 0,date,daily_rf_rate
0,1990-01-02,0.00026
1,1990-01-03,0.00026
2,1990-01-04,0.00026
3,1990-01-05,0.00026
4,1990-01-08,0.00026
...,...,...
8748,2024-09-24,0.00020
8749,2024-09-25,0.00020
8750,2024-09-26,0.00020
8751,2024-09-27,0.00020


# Choosing Companies

In [59]:
# retrieves ID (permno) and earliest recorded name for each company in CRSP database
# 37,776 companies available

comps = conn.raw_sql( '''
SELECT permno, MIN(comnam) AS company_name
FROM crsp.stocknames
GROUP BY permno
''')
comps

Unnamed: 0,permno,company_name
0,83264,GREIF BROTHERS CORP
1,63618,HINDERLITER ENERGY EQUIP CORP
2,10896,CAMILLE ST MORITZ INC
3,69906,SEIBELS BRUCE GROUP INC
4,79030,GREAT CENTRAL MINES LTD
...,...,...
37771,14886,ARK E T F TRUST
37772,79163,A M F M INC
37773,86036,BRENTWOOD INSTRUMENTS INC
37774,92970,CHINA EDUCATION ALLIANCE INC


In [60]:
#TODO: choose a way to narrow the above list of permnos to <500. Will then use that list with the following functions to gather fin data
#this will go away when the above is completed.

company_search = 'AMAZON' # FIND A COMPANY HERE

comps[comps['company_name'].str.contains(f'{company_search}')]

Unnamed: 0,permno,company_name
9682,84788,AMAZON COM INC


# Helper functions

In [61]:
def get_comp_data(permno, gvkey):

    company_fin_data = get_company_financials(gvkey)
    
    #forward fill
    company_fin_data = company_fin_data.ffill().dropna()
    company_stock_prcs = get_prices(permno)
    
    # Create new columns with just year and month
    company_fin_data['year_month'] = company_fin_data['date'].dt.strftime('%Y-%m')
    company_stock_prcs['year_month'] = company_stock_prcs['date'].dt.strftime('%Y-%m')
    
    company = pd.merge(company_fin_data, company_stock_prcs, on='year_month')

    company.drop(columns=['year_month', 'date_y'], axis=1, inplace=True)
    company.rename(columns={'date_x':'date'}, inplace=True)
    
    # company = pd.merge(company_fin_data, company_stock_prcs, on='date', how='inner')
    # company['date'] = company['reporting_date'] # backtest logic TODO
    return company

In [62]:
def get_company_financials(gvkey):
    company_fin_data = conn.raw_sql(f'''
    SELECT
        datadate AS date,
        gvkey AS gvkey,
        rdq AS reporting_date, -- Date of which information was reported
        atq AS total_assets,  -- Total Assets
        chq AS cash_holdings,  -- Cash and Short-Term Investments
        dlttq + dlcq AS total_debt,  -- Total Debt (long-term + short-term debt)
        ibq AS earnings,  -- Earnings before extraordinary items
        xrdq AS rd_expense,  -- R&D expense
        dvpq AS dividends_paid,  -- Dividends paid
        xintq AS interest_expense  -- Interest expense
    FROM
        comp.fundq
    WHERE
        gvkey = '{gvkey}'
        AND datadate >= '1990-01-01'  -- Ensure data is after the link start date
    ''')
    
    company_fin_data['date'] = pd.to_datetime(company_fin_data['date'])
    company_fin_data['reporting_date'] = pd.to_datetime(company_fin_data['reporting_date'])
    
    start_date = company_fin_data['date'].min()
    end_date = company_fin_data['date'].max()
    all_months = pd.date_range(start=start_date, end=end_date, freq='ME')
    full_range_df = pd.DataFrame({'date': all_months})
    company_fin_data = pd.merge(full_range_df, company_fin_data, on='date', how='left')
    
    company_fin_data['month_start'] = company_fin_data['date'] - pd.offsets.MonthBegin()
    company_fin_data['month_end'] = company_fin_data['date']
    
    return company_fin_data

In [63]:
def get_gvkey(permno):
    link = conn.raw_sql(f'''
    SELECT *
    FROM crsp.ccmxpf_linktable
    WHERE lpermno = {permno};
    ''')
    
    return link['gvkey'][0]

In [64]:
def get_prices(permno):
    company_stock_prcs = conn.raw_sql(f'''
    SELECT
        date,
        prc AS stock_price,
        ret AS stock_return,  -- Stock returns from CRSP
        prc * shrout AS market_cap  -- Market cap
    FROM
        crsp.msf
    WHERE
        permno = {permno}
    ''')
    
    company_stock_prcs['date'] = pd.to_datetime(company_stock_prcs['date'])
    
    return company_stock_prcs

In [65]:
#calculations for cash-hedging & returns
def calculate_returns(company, lower = 0.01, upper = 0.99):
    company['cash_share_weight'] = (company['cash_holdings'] / company['total_assets'])
    company['cash_holdings_t_minus_1'] = company['cash_holdings'].shift()
    
    calculate_b_it(company)
    calculate_e_it(company)
    company['cash_hedged_return'] = winsorize(company['cash_hedged_return'], lower, upper)

In [66]:
def calculate_b_it(company):
    data = company.copy()
    data['market_cap_t_minus_1'] = data['market_cap'].shift() # paper uses M_{t-1} for the denoms
    data['leverage'] = data['total_debt'] / (data['total_debt'] + data['market_cap'])
    
    # Y VALUES
    data['r_minus_R'] = data['stock_return'] - data['rf_rate']
    
    # REGRESSION VARIABLES
    data['gamma_1'] = (data['cash_holdings'].diff()) / data['market_cap_t_minus_1']
    data['gamma_2'] = (data['earnings'].diff()) / data['market_cap_t_minus_1']
    data['gamma_3'] = ((data['total_assets'] - data['cash_holdings']).diff()) / data['market_cap_t_minus_1']
    data['gamma_4'] = (data['rd_expense'].diff()) / data['market_cap_t_minus_1']
    data['gamma_5'] = (data['interest_expense'].diff()) / data['market_cap_t_minus_1']
    data['gamma_6'] = (data['dividends_paid'].diff()) / data['market_cap_t_minus_1']
    data['gamma_7']= data['cash_holdings_t_minus_1'] / data['market_cap_t_minus_1']
    data['gamma_8'] = data['leverage']
    data['gamma_9'] = (data['total_debt'].diff() + data['market_cap_t_minus_1'].diff()) / (data['total_debt'].shift() + data['market_cap_t_minus_1'].shift())
    data['gamma_10'] = (data['market_cap_t_minus_1'] * (data['cash_holdings'].diff())) / (data['market_cap'] ** 2)
    data['gamma_11'] = (data['leverage'] * (data['cash_holdings'].diff())) / data['market_cap']
    
    data = data.dropna()
    
    y = data['r_minus_R']
    X = data[['gamma_1', 'gamma_2', 'gamma_3', 'gamma_4', 'gamma_5', 'gamma_6', 'gamma_7', 'gamma_8', 'gamma_9', 'gamma_10', 'gamma_11']]
    
    X = sm.add_constant(X)
    
    model = sm.OLS(y, X).fit()
    
    #print(model.summary())
    
    data['marginal_cash_value'] = (
        model.params.iloc[0] +
        (model.params.iloc[10] * (data['cash_holdings_t_minus_1'] / data['market_cap_t_minus_1'])) +
        (model.params.iloc[11] * data['leverage'])
    )
    
    data['average_cash_value'] = data['marginal_cash_value'] * data['cash_holdings']
    
    company['b_it'] = data['average_cash_value'].pct_change()  # monthly cash return

In [67]:
def calculate_e_it(company):
    company['cash_hedged_return'] = (1 / (1 - company['cash_share_weight'])) * (company['stock_return'] - (company['cash_share_weight']  * company['b_it']))

In [68]:
def winsorize(series, lower=0.01, upper=0.99):
    lower_threshold = series.quantile(lower)
    upper_threshold = series.quantile(upper)
    return series.clip(lower=lower_threshold, upper=upper_threshold)

In [69]:
window = 65 # months

def calculate_rolling_beta(stock_return, market_return, window = 65):
    rolling_cov = stock_return.rolling(window).cov(market_return)
    rolling_var = market_return.rolling(window).var()
    return rolling_cov / rolling_var

# Getting Aggregate & Cleaned Data

In [70]:
permnos = [10107, 86580, 84788]
gvkeys = [get_gvkey(permno) for permno in permnos]

agg_fin_data = pd.DataFrame(columns=['date', 'gvkey', 'reporting_date', 'total_assets', 'cash_holdings',
       'total_debt', 'earnings', 'rd_expense', 'dividends_paid',
       'interest_expense', 'stock_price', 'stock_return', 'market_cap',
       'snp_return', 'rf_rate', 'cash_share_weight', 'cash_holdings_t_minus_1',
       'b_it', 'cash_hedged_return','company_beta','cash_hedged_beta'])

for i in range(len(permnos)):
    company = get_comp_data(permnos[i],gvkeys[i])
    
    def aggregate_monthly(start_date, end_date, daily_data, column, agg_func):
        mask = (daily_data['date'] >= start_date) & (daily_data['date'] <= end_date)
        return agg_func(daily_data.loc[mask, column])
    
    company['snp_return'] = company.apply(
        lambda row: aggregate_monthly(row['month_start'], row['month_end'], sp500_data, 'daily_return', 
                                        lambda x: np.prod(1 + x) - 1), axis=1
    )
    
    company['rf_rate'] = company.apply(
        lambda row: aggregate_monthly(row['month_start'], row['month_end'], risk_free_data, 'daily_rf_rate', 
                                        lambda x: np.prod(1 + x) - 1), axis=1
    )
    
    company.drop(columns=['month_start', 'month_end'], axis=1, inplace=True)
    
    calculate_returns(company)
    
    company['company_beta'] = calculate_rolling_beta(company['stock_return'], company['snp_return'], window)
    company['cash_hedged_beta'] = calculate_rolling_beta(company['cash_hedged_return'], company['snp_return'], window)
    
    if not agg_fin_data.empty:
        agg_fin_data = pd.concat([agg_fin_data, company], ignore_index=True)
    else:
        agg_fin_data = company
    
agg_fin_data = agg_fin_data.sort_values(by='date').reset_index(drop=True)
agg_fin_data

Unnamed: 0,date,gvkey,reporting_date,total_assets,cash_holdings,total_debt,earnings,rd_expense,dividends_paid,interest_expense,...,stock_return,market_cap,snp_return,rf_rate,cash_share_weight,cash_holdings_t_minus_1,b_it,cash_hedged_return,company_beta,cash_hedged_beta
0,2007-03-31,064768,2007-04-24,3661.000,748.000,1267.0,111.000,186.000,0.0,19.0,...,0.016607,1.627411e+07,0.009981,0.004188,0.204316,,,,,
1,2007-04-30,117768,2007-05-10,2800.868,678.951,0.0,132.259,158.321,0.0,0.0,...,0.142807,1.193621e+07,0.043292,0.004409,0.242407,,,,,
2,2007-04-30,064768,2007-04-24,3661.000,748.000,1267.0,111.000,186.000,0.0,19.0,...,0.541342,2.512678e+07,0.043292,0.004409,0.204316,748.000,,,,
3,2007-05-31,064768,2007-04-24,3661.000,748.000,1267.0,111.000,186.000,0.0,19.0,...,0.127344,2.832652e+07,0.032547,0.003967,0.204316,748.000,,,,
4,2007-05-31,117768,2007-05-10,2800.868,678.951,0.0,132.259,158.321,0.0,0.0,...,0.053177,1.257094e+07,0.032547,0.003967,0.242407,678.951,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
594,2023-11-30,064768,2023-10-26,486883.000,49605.000,161065.0,9879.000,21203.000,0.0,806.0,...,0.097678,1.509699e+09,0.089178,0.004419,0.101883,49605.000,-0.088986,0.118853,1.234687,1.635492
595,2023-11-30,117768,2023-11-21,54148.000,5519.000,11027.0,9243.000,2294.000,0.0,63.0,...,0.146886,1.155219e+09,0.089178,0.004419,0.101924,5519.000,-0.129501,0.178254,1.747544,0.537961
596,2023-12-31,012141,2024-01-30,470558.000,17305.000,111358.0,21870.000,7142.000,0.0,921.0,...,-0.007574,2.794828e+09,0.044230,0.004208,0.036775,80452.000,-0.771588,0.021596,0.896334,1.077218
597,2023-12-31,117768,2023-11-21,54148.000,5519.000,11027.0,9243.000,2294.000,0.0,63.0,...,0.058927,1.223193e+09,0.044230,0.004208,0.101924,5519.000,-0.055571,0.071922,1.747104,0.545106
