In [None]:
"""
This Script pulls the baseline data for our project. This data is then used for our baseline linear model outlined in: 
1. Stock returns and firm characteristics (inspired by the Fama French 5-factor model) are pulled from the Global Factor Database in WRDS (Wharton Research Data Services) 
    which combines data from the survivor bias free stock return dataset CRSP (Centre for Research in Securities Prices) with public company financial information from Compustat.
2. Macro variables from FRED including the slope of the yield curve, short and long term interest rates, and the unemployment rate
"""

import wrds
import pandas as pd
from fredapi import Fred

## Connect to WRDS
wrds_db = wrds.Connection()
## Get Ben's FRED api key
fred = Fred(api_key_file="C:/Users/biorio/Desktop/fredkey.txt")

Loading library list...
Done


In [37]:
## Documentation on all of these characteristics can be found here: https://jkpfactors.s3.amazonaws.com/documents/Documentation.pdf

# Take characteristics associated with fama-french factors - Size, B/M, Operating Profit, Asset Growth, Momentum (1-3 months - "randomly" chosen)
sql_query = f""" 
    SELECT id, eom, excntry, gvkey, permno, prc_local, ret_exc, ret_exc_lead1m, me, be_me, ope_be, at_gr1, ret_3_1
    FROM contrib.global_factor
    WHERE common=1 and exch_main=1 and primary_sec=1 and obs_main=1 and
    excntry='USA' and eom > '1985-01-01'
"""

data = wrds_db.raw_sql(sql_query)
data = data.sort_values(['id', 'eom'])
data

Unnamed: 0,id,eom,excntry,gvkey,permno,prc_local,ret_exc,ret_exc_lead1m,me,be_me,ope_be,at_gr1,ret_3_1
0,10730.0,1992-05-31,USA,017092,10730.0,17.5,-0.002758,0.039656,30.695,,,,0.147967
1,10730.0,1992-06-30,USA,017092,10730.0,18.25,0.039656,0.046423,32.0835,,,,0.103125
2,10730.0,1992-07-31,USA,017092,10730.0,19.0,0.046423,0.023711,33.402,,,,0.042857
3,10730.0,1992-08-31,USA,017092,10730.0,19.5,0.023711,0.029478,34.281,,,,0.094286
4,10730.0,1992-09-30,USA,017092,10730.0,20.125,0.029478,0.110757,35.42,,,,0.076929
...,...,...,...,...,...,...,...,...,...,...,...,...,...
284833,93436.0,2024-08-31,USA,184996,93436.0,214.110001,-0.081809,0.217323,684004.37235,0.094119,0.187362,0.257886,0.303178
284834,93436.0,2024-09-30,USA,184996,93436.0,261.630005,0.217323,-0.048932,839047.425659,0.076727,0.187362,0.257886,0.082019
284835,93436.0,2024-10-31,USA,184996,93436.0,249.850006,-0.048932,0.377514,802033.510593,0.082874,0.179876,0.24551,0.127375
284836,93436.0,2024-11-30,USA,184996,93436.0,345.160004,0.377514,0.166345,1107984.321356,0.05999,0.179876,0.24551,0.166924


In [39]:
data = data.sort_values(['id', 'eom'])

data.to_csv('base_variables.csv.gz', compression="gzip", index=False) 

In [24]:
## Macro Variables from FRED
## Pull 10Y rate, yield curve slope, and Fed Funds Rate (to use as Risk free)
series_ids = ['DGS10', 'T10Y2Y', 'FEDFUNDS', 'UNRATE']
df = pd.DataFrame()
for series_id in series_ids:
    s = fred.get_series(series_id)
    s.name = series_id # Assign the series ID as the column name
    if df.empty:
        df = s.to_frame() # Convert the first series to a DataFrame
    else:
        df = df.join(s, how='outer') # Join subsequent series

df = df[df.index>='1985-01-01'].resample('M').last()
## aggregate to monthly

df = df.reset_index(names = 'date')
df

  df = df[df.index>='1985-01-01'].resample('M').last()


Unnamed: 0,date,DGS10,T10Y2Y,FEDFUNDS,UNRATE
0,1985-01-31,11.17,1.27,8.35,7.3
1,1985-02-28,11.91,1.25,8.50,7.2
2,1985-03-31,11.65,1.22,8.58,7.2
3,1985-04-30,11.41,1.50,8.27,7.3
4,1985-05-31,10.28,1.36,7.97,7.2
...,...,...,...,...,...
486,2025-07-31,4.37,0.43,4.33,4.2
487,2025-08-31,4.23,0.64,4.33,4.3
488,2025-09-30,4.16,0.56,4.22,
489,2025-10-31,4.11,0.51,4.09,


In [23]:
df.to_csv('macro_variables.csv.gz', compression="gzip", index=False) 

In [36]:

sql_query = f''' SELECT gvkey, conm FROM comp_na_daily_all.fundq WHERE gvkey = '112780' '''

compustat_data = wrds_db.raw_sql(sql_query)
print(compustat_data.head())

    gvkey                 conm
0  112780  RHBT FINANCIAL CORP
1  112780  RHBT FINANCIAL CORP
2  112780  RHBT FINANCIAL CORP
3  112780  RHBT FINANCIAL CORP
4  112780  RHBT FINANCIAL CORP


In [44]:
data[data['gvkey']=='112780']
data[data['gvkey']=='185164']

Unnamed: 0,id,eom,excntry,gvkey,permno,prc_local,ret_exc,ret_exc_lead1m,me,be_me,ope_be,at_gr1,ret_3_1
129184,118516401.0,2014-02-28,USA,185164,,2.05,,-2.8e-05,614.21075,1.261805,-0.207278,-0.152357,
129236,118516401.0,2014-03-31,USA,185164,,2.05,-2.8e-05,0.146326,615.13325,1.259912,-0.207278,-0.152357,
129558,118516401.0,2014-04-30,USA,185164,,2.35,0.146326,0.008503,705.15275,1.135988,-0.086845,-0.220481,
129873,118516401.0,2014-05-31,USA,185164,,2.37,0.008503,-0.367109,711.07584,1.126525,-0.086845,-0.220481,0.146341
130200,118516401.0,2014-06-30,USA,185164,,1.5,-0.367109,0.232654,450.048,1.77991,-0.086845,-0.220481,0.156098
130541,118516401.0,2014-07-31,USA,185164,,1.849,0.232654,,554.759168,1.390663,-0.074118,-0.195713,-0.361702
67348,118516401.0,2014-08-31,USA,185164,,,,,,,-0.074118,-0.195713,-0.219831
112938,118516401.0,2014-09-30,USA,185164,,2.37,0.28177,-0.021109,710.94075,1.085158,-0.074118,-0.195713,
131467,118516401.0,2014-10-31,USA,185164,,2.32,-0.021109,-0.00043,695.942,1.083727,-0.066029,-0.188541,
131852,118516401.0,2014-11-30,USA,185164,,2.319,-0.00043,-0.003455,695.642025,1.084194,-0.066029,-0.188541,0.254732
