In [1]:
import wrds, datetime
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
pd.options.mode.chained_assignment = None  # default='warn'

conn = wrds.Connection()

WRDS recommends setting up a .pgpass file.
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


In [2]:
#Gets risk-free rates at EOM from Dec 1925 to Dec 2023. Data is from tfz_mth_rf table in the CRSP library.
rfr = conn.raw_sql('''
SELECT
    mcaldt AS date,
    tmytm AS risk_free_rate
FROM
    crsp.tfz_mth_rf
''')

rfr = rfr[:int(len(rfr)/2)] # the table has two RFRs for each date for some reason so just taking the first half.

rfr.tail(10)

Unnamed: 0,date,risk_free_rate
1167,2023-03-31,4.665815
1168,2023-04-28,4.169626
1169,2023-05-31,4.954459
1170,2023-06-30,5.162219
1171,2023-07-31,5.329893
1172,2023-08-31,5.356142
1173,2023-09-29,5.365935
1174,2023-10-31,5.370237
1175,2023-11-30,5.345954
1176,2023-12-29,5.381215


In [3]:
#Retrieves Fama-French monthly factor data and the risk-free rate (1-month Treasury bill rate) at BOM from WRDS.
factors = conn.raw_sql('''
    SELECT
        date,
        mktrf AS market_factor,  -- Market factor (excess return on the market)
        smb AS size_factor,  -- Size factor (Small Minus Big)
        hml AS value_factor,  -- Value factor (High Minus Low)
        umd AS momentum_factor,  -- Momentum factor
        rf AS risk_free_rate -- One Month Treasury Bill Rate (daily)
    FROM
        ff.factors_monthly
''')

factors

Unnamed: 0,date,market_factor,size_factor,value_factor,momentum_factor,risk_free_rate
0,1926-07-01,0.0296,-0.0256,-0.0243,,0.0022
1,1926-08-01,0.0264,-0.0117,0.0382,,0.0025
2,1926-09-01,0.0036,-0.0140,0.0013,,0.0023
3,1926-10-01,-0.0324,-0.0009,0.0070,,0.0032
4,1926-11-01,0.0253,-0.0010,-0.0051,,0.0031
...,...,...,...,...,...,...
1174,2024-05-01,0.0434,0.0078,-0.0166,-0.0002,0.0044
1175,2024-06-01,0.0277,-0.0306,-0.0331,0.0090,0.0041
1176,2024-07-01,0.0124,0.0680,0.0573,-0.0242,0.0045
1177,2024-08-01,0.0161,-0.0355,-0.0113,0.0479,0.0048


In [4]:
#gets ID (permno) and earliest recorded name for each company in CRSP database. There are 37,776 companies available to us.
conn.raw_sql( '''
SELECT permno, MIN(comnam) AS company_name
FROM crsp.stocknames
GROUP BY permno
''')

Unnamed: 0,permno,company_name
0,83264,GREIF BROTHERS CORP
1,63618,HINDERLITER ENERGY EQUIP CORP
2,10896,CAMILLE ST MORITZ INC
3,69906,SEIBELS BRUCE GROUP INC
4,79030,GREAT CENTRAL MINES LTD
...,...,...
37771,14886,ARK E T F TRUST
37772,79163,A M F M INC
37773,86036,BRENTWOOD INSTRUMENTS INC
37774,92970,CHINA EDUCATION ALLIANCE INC


In [5]:
#Using NVIDIA as example to figure out the algorithm.

conn.raw_sql('''
SELECT *
FROM crsp.ccmxpf_linktable
WHERE lpermno = 86580;
''')

Unnamed: 0,gvkey,linkprim,liid,linktype,lpermno,lpermco,usedflag,linkdt,linkenddt
0,117768,P,1,LU,86580.0,16382.0,1.0,1999-01-22,


In [13]:
#Getting NVIDIA time series data.
nvidia = conn.raw_sql('''
WITH comp_data AS (
    SELECT
        f.gvkey,
        f.datadate AS date,
        f.atq AS total_assets,  -- Total Assets
        f.chq AS cash_holdings,  -- Cash and Short-Term Investments
        f.dlttq + f.dlcq AS total_debt,  -- Total Debt (long-term + short-term debt)
        f.ibq AS earnings,  -- Earnings before extraordinary items
        f.xrdq AS rd_expense,  -- R&D expense
        f.dvpq AS dividends_paid,  -- Dividends paid
        f.xintq AS interest_expense,  -- Interest expense
        f.prccq * f.cshoq AS market_cap  -- Market cap (price * shares outstanding)
    FROM
        comp.fundq f
    WHERE
        gvkey = '117768'
        AND f.datadate >= '1986-10-29'  -- Ensure data is after the link start date
),
crsp_data AS (
    SELECT
        permno,
        date,
        ret AS stock_return,  -- Stock returns from CRSP
        prc AS stock_price,
        prc * shrout AS market_cap  -- Market cap
    FROM
        crsp.msf
    WHERE
        permno = 86580  -- Use the correct permno for gvkey 003243
)
SELECT
    c.gvkey,
    cr.permno,
    c.date,
    cr.stock_price,
    c.cash_holdings,
    c.total_assets,
    c.market_cap,
    c.earnings,
    c.total_debt,
    c.rd_expense,
    c.dividends_paid,
    c.interest_expense
FROM
    comp_data c
JOIN
    crsp_data cr ON c.date = cr.date
ORDER BY
    c.gvkey, c.date;

''')

nvidia

Unnamed: 0,gvkey,permno,date,stock_price,cash_holdings,total_assets,market_cap,earnings,total_debt,rd_expense,dividends_paid,interest_expense
0,117768,86580,1999-04-30,18.25000,,126.656,5.370610e+02,6.261,3.156,8.785,0.0,
1,117768,86580,2000-01-31,37.06250,,202.250,1.152644e+03,14.587,2.748,15.421,0.0,
2,117768,86580,2000-07-31,60.00000,,507.183,3.927240e+03,22.522,1.776,20.141,0.0,
3,117768,86580,2000-10-31,62.14063,,983.346,4.218727e+03,28.071,301.329,22.023,0.0,
4,117768,86580,2001-01-31,51.62500,,1016.427,3.534041e+03,31.057,300.966,26.444,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
65,117768,86580,2022-01-31,244.86000,1990.0,44187.000,6.136192e+05,3003.000,11831.000,1466.000,0.0,61.0
66,117768,86580,2022-10-31,134.97000,2800.0,40488.000,3.331060e+05,680.000,11904.000,1944.000,0.0,66.0
67,117768,86580,2023-01-31,195.37000,3389.0,41182.000,4.817824e+05,1414.000,12031.000,1952.000,0.0,64.0
68,117768,86580,2023-07-31,467.29001,5783.0,49555.000,1.153739e+06,6189.000,10954.000,2041.000,0.0,65.0


In [14]:
#adding cash share & quarterly returns
nvidia['cash_share_lagged'] = (nvidia['cash_holdings']/nvidia['total_assets']).shift(2)
nvidia['quarterly_return'] = nvidia['stock_price'].pct_change(fill_method=None)

In [15]:
#firm-specific cash return estimate, based on Faulkender and Wang (2006) model
data = nvidia.copy()

#side note: might want to implement a check that market_cap is never NaN or 0 before the following scaling computations

# scaled change in each var
data['delta_cash'] = (data['cash_holdings'].diff()) / data['market_cap']
data['delta_earnings'] = (data['earnings'].diff()) / data['market_cap']
data['delta_net_assets'] = ((data['total_assets'] - data['cash_holdings']).diff()) / data['market_cap']
data['delta_r_and_d'] = (data['rd_expense'].diff()) / data['market_cap']
data['delta_dividends'] = (data['dividends_paid'].diff()) / data['market_cap']
data['leverage'] = data['total_debt'] / (data['total_debt'] + data['market_cap'])
data['net_financing'] = (data['total_debt'].diff() + data['market_cap'].diff()) / data['market_cap']

#regression
X = data[['delta_cash', 'delta_earnings', 'delta_net_assets',
          'delta_r_and_d', 'delta_dividends', 'leverage', 'net_financing']]
y = data['quarterly_return']

# must drop NaN values for the regression, but we due to missing values we are left with very few data points (for nvidia at least)
X = X.dropna()
y = y.loc[X.index]

model = LinearRegression().fit(X, y)
nvidia_b_it = model.coef_[0]

In [16]:
# calculate the cash-hedged return e_it
epsilon = 1e-6  # Small constant to avoid division by zero or very small numbers
nvidia['cash_hedged_return'] = (1 / (1 - nvidia['cash_share_lagged'] + epsilon)) * (
    nvidia['quarterly_return'] - (nvidia['cash_share_lagged'] / (1 - nvidia['cash_share_lagged'] + epsilon)) * nvidia_b_it
)
nvidia

Unnamed: 0,gvkey,permno,date,stock_price,cash_holdings,total_assets,market_cap,earnings,total_debt,rd_expense,dividends_paid,interest_expense,cash_share_lagged,quarterly_return,cash_hedged_return
0,117768,86580,1999-04-30,18.25000,,126.656,5.370610e+02,6.261,3.156,8.785,0.0,,,,
1,117768,86580,2000-01-31,37.06250,,202.250,1.152644e+03,14.587,2.748,15.421,0.0,,,1.030822,
2,117768,86580,2000-07-31,60.00000,,507.183,3.927240e+03,22.522,1.776,20.141,0.0,,,0.618887,
3,117768,86580,2000-10-31,62.14063,,983.346,4.218727e+03,28.071,301.329,22.023,0.0,,,0.035677,
4,117768,86580,2001-01-31,51.62500,,1016.427,3.534041e+03,31.057,300.966,26.444,0.0,,,-0.169223,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,117768,86580,2022-01-31,244.86000,1990.0,44187.000,6.136192e+05,3003.000,11831.000,1466.000,0.0,61.0,0.130024,-0.592158,1.046363
66,117768,86580,2022-10-31,134.97000,2800.0,40488.000,3.331060e+05,680.000,11904.000,1944.000,0.0,66.0,0.031757,-0.448787,-0.122968
67,117768,86580,2023-01-31,195.37000,3389.0,41182.000,4.817824e+05,1414.000,12031.000,1952.000,0.0,64.0,0.045036,0.447507,0.965059
68,117768,86580,2023-07-31,467.29001,5783.0,49555.000,1.153739e+06,6189.000,10954.000,2041.000,0.0,65.0,0.069156,1.391821,2.297581


Seems like in some instances cash_hedged_returns is getting super over inflated. Ideally, that is what we want! But will need to make sure the raise in risk is also reflected.

Next steps:
* Calculate Beta for non-cash assets in NVIDIA
* Use above data to test returns in cash-hedged for NVIDIA over time (long-only) 
* (etc)

Calculating Stock Beta and Cash-Hedged Beta (PiT).

In [18]:
#getting market data
sp500_data = conn.raw_sql("""
SELECT
    date,
    vwretd AS market_return
FROM
    crsp.msi
WHERE
    date >= '1986-10-29'
ORDER BY
    date;
""")

sp500_data['date'] = pd.to_datetime(sp500_data['date'])
sp500_data.set_index('date', inplace=True)
sp500_data['market_quarterly_return'] = (
    sp500_data['market_return']
    .rolling(window=3, min_periods=3)
    .apply(lambda x: (1 + x).prod() - 1, raw=True)
)
sp500_data.reset_index(inplace=True)
sp500_data

Unnamed: 0,date,market_return,market_quarterly_return
0,1986-10-31,0.049305,
1,1986-11-28,0.015093,
2,1986-12-31,-0.026387,0.037036
3,1987-01-30,0.128499,0.115304
4,1987-02-27,0.047922,0.151374
...,...,...,...
442,2023-08-31,-0.020605,0.081413
443,2023-09-29,-0.047390,-0.033782
444,2023-10-31,-0.028867,-0.093951
445,2023-11-30,0.092057,0.010274


In [19]:
nvidia['date'] = pd.to_datetime(nvidia['date'])
combined_data = pd.merge(nvidia, sp500_data, on='date', how='inner')
combined_data.sort_values(by='date', inplace=True)

# can change this
rolling_window = 30

rolling_standard_betas = []
#rolling_cash_hedged_betas = []
dates = []

#SOMETHING IS WRONG HERE... cash-hedged betas are negative...
for i in range(rolling_window, len(combined_data)):
    window_data = combined_data.iloc[i - rolling_window:i]

    if window_data['quarterly_return'].isna().any():
         continue

    X_market = window_data[['market_quarterly_return']]
    y_standard = window_data['quarterly_return']
    #y_cash_hedged = window_data['cash_hedged_return']

    # for standard beta
    standard_model = LinearRegression().fit(X_market, y_standard)
    standard_beta = standard_model.coef_[0]

    # for cash-hedged beta
    #cash_hedged_model = LinearRegression().fit(X_market, y_cash_hedged)
    #cash_hedged_beta = cash_hedged_model.coef_[0]

    rolling_standard_betas.append(standard_beta)
    #rolling_cash_hedged_betas.append(cash_hedged_beta)
    dates.append(combined_data['date'].iloc[i])

beta_results = pd.DataFrame({
    'date': dates,
    'standard_beta': rolling_standard_betas,
    #'cash_hedged_beta': rolling_cash_hedged_betas
})

beta_results.head()

Unnamed: 0,date,standard_beta
0,2009-07-31,2.539127
1,2010-04-30,2.041624
2,2011-01-31,2.027899
3,2011-10-31,2.139724
4,2012-01-31,2.178785
