#Overview

The purpose of this notebook was to explore the Center for Research in Security Prices (CRSP) mutual fund dataset accessed through Wharton Research Data Services (WRDS) and select funds that were used in our portfolio.

# Setup

## Load Libraries

In [46]:
import pandas as pd
import numpy as np

#Enables viewing of large variables and dataframes when printed 
pd.options.display.max_colwidth = 200

## Set Path to Data

In [8]:
filepath = 'PATH_TO_DATA'

 Helper Function to Print Dataframe Properties in their Entirety

In [9]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

# Dataset Exploratory Data Analysis

## Year-end CRSP Mutual Fund Dataset EDA

In [49]:
#Read in CSV file
crsp_mf_since2016 = pd.read_csv(filepath + 'crsp_mutulfundsummary_since2016.csv', encoding='latin-1')

In [50]:
#Dataframe shape
crsp_mf_since2016.shape

(192911, 73)

In [51]:
#See first five entries
display(crsp_mf_since2016.head())

Unnamed: 0,summary_period2,crsp_fundno,caldt,nav_latest,nav_latest_dt,tna_latest,tna_latest_dt,yield,div_ytd,cap_gains_ytd,...,accrual_fund,sales_restrict,wbrger_obj_cd,policy,lipper_class,lipper_class_name,lipper_obj_cd,lipper_obj_name,lipper_asset_cd,lipper_tax_cd
0,AQ,105,20161230,12.79,20161230,26.5,20161230.0,,,0.138945,...,N,N,,,GMLG,Global Multi-Cap Growth,GL,GLOBAL FUNDS,EQ,Taxable
1,AQ,105,20171229,14.96,20171229,29.6,20171229.0,,,1.157885,...,N,N,,,GMLG,Global Multi-Cap Growth,GL,GLOBAL FUNDS,EQ,Taxable
2,AQ,105,20181231,10.32,20181231,21.2,20181231.0,,,2.626225,...,N,N,,,GSME,Global Small-/Mid-Cap Funds,GL,GLOBAL FUNDS,EQ,Taxable
3,AQ,105,20191231,12.28,20191231,26.6,20191231.0,,,1.154015,...,N,N,,,GMLG,Global Multi-Cap Growth Fds,GL,GLOBAL FUNDS,EQ,Taxable
4,AQ,105,20201231,13.78,20201231,34.7,20201231.0,,,1.922754,...,N,N,,,GMLG,Global Multi-Cap Growth Fds,GL,GLOBAL FUNDS,EQ,Taxable


In [52]:
#See available fields
display(crsp_mf_since2016.columns)

Index(['summary_period2', 'crsp_fundno', 'caldt', 'nav_latest',
       'nav_latest_dt', 'tna_latest', 'tna_latest_dt', 'yield', 'div_ytd',
       'cap_gains_ytd', 'nav_52w_h', 'nav_52w_h_dt', 'nav_52w_l',
       'nav_52w_l_dt', 'unrealized_app_dep', 'unrealized_app_dt', 'asset_dt',
       'per_com', 'per_pref', 'per_conv', 'per_corp', 'per_muni', 'per_govt',
       'per_oth', 'per_cash', 'per_bond', 'per_abs', 'per_mbs', 'per_eq_oth',
       'per_fi_oth', 'maturity', 'maturity_dt', 'cusip8', 'crsp_portno',
       'crsp_cl_grp', 'fund_name', 'ticker', 'ncusip', 'mgmt_name', 'mgmt_cd',
       'mgr_name', 'mgr_dt', 'adv_name', 'open_to_inv', 'retail_fund',
       'inst_fund', 'm_fund', 'index_fund_flag', 'vau_fund', 'et_flag',
       'delist_cd', 'first_offer_dt', 'end_dt', 'dead_flag', 'merge_fundno',
       'actual_12b1', 'max_12b1', 'mgmt_fee', 'exp_ratio', 'turn_ratio',
       'fiscal_yearend', 'crsp_obj_cd', 'si_obj_cd', 'accrual_fund',
       'sales_restrict', 'wbrger_obj_cd', 'poli

In [53]:
#Display fields that have non-zero N/A counts
null_counts = crsp_mf_since2016.isna().sum()
print_full(null_counts[null_counts > 0])

tna_latest              6499
tna_latest_dt           4830
yield                  48378
div_ytd                47634
cap_gains_ytd         103628
unrealized_app_dep    192911
unrealized_app_dt     192911
asset_dt                8505
per_com                 8505
per_pref                8505
per_conv                8505
per_corp                8505
per_muni                8505
per_govt                8505
per_oth                 8505
per_cash                8505
per_bond                8505
per_abs                 8505
per_mbs                 8505
per_eq_oth              8505
per_fi_oth              8505
maturity               83835
maturity_dt            82956
cusip8                  2135
crsp_portno             8058
crsp_cl_grp             1035
fund_name               1009
ticker                 23130
ncusip                  2135
mgmt_name               1104
mgmt_cd                24430
mgr_name               24441
mgr_dt                 72172
adv_name                1010
open_to_inv   

In [54]:
#Print first entry
print_full(crsp_mf_since2016.iloc[0])

summary_period2                                                                      AQ
crsp_fundno                                                                         105
caldt                                                                          20161230
nav_latest                                                                        12.79
nav_latest_dt                                                                  20161230
tna_latest                                                                         26.5
tna_latest_dt                                                                20161230.0
yield                                                                               NaN
div_ytd                                                                             NaN
cap_gains_ytd                                                                  0.138945
nav_52w_h                                                                          13.3
nav_52w_h_dt                    

In [55]:
#See available dates
display(crsp_mf_since2016['caldt'].unique())

array([20161230, 20171229, 20181231, 20191231, 20201231, 20211231])

In [56]:
#See Lipper Asset Codes
#EQ - Equity Funds
#TX - Taxable Fixed Income Funds
#MB - Tax Free Fixed Income Funds
crsp_mf_since2016['lipper_asset_cd'].unique()

array(['EQ', 'TX', 'MB', nan], dtype=object)

In [57]:
#Number of funds per Lipper Asset Codes across all available dates
#There are far more equity-centric funds than fixed income funds
crsp_mf_since2016['lipper_asset_cd'].value_counts(dropna=False)

EQ     140083
TX      39746
MB      12374
NaN       708
Name: lipper_asset_cd, dtype: int64

## Selecting an Equity Fund

In [58]:
#See Lipper Classification Names for Equity Funds
#The S&P 500 Index Funds classification was selected
display(crsp_mf_since2016[crsp_mf_since2016['lipper_asset_cd'] == 'EQ']['lipper_class_name'].unique())

array(['Global Multi-Cap Growth', 'Global Small-/Mid-Cap Funds',
       'Global Multi-Cap Growth Fds', 'Small-Cap Core Funds',
       'Equity Income Funds', 'Small-Cap Growth Funds',
       'Mid-Cap Core Funds', 'Mid-Cap Value Funds',
       'Mixed-Asset Target Alloc Growth Funds', 'Large-Cap Growth Funds',
       'Mixed-Asset Target Alloc Moderate Funds',
       'Mixed-Asset Trgt Alloc Mod Fds', 'Mixed-Asset Target 2020 Funds',
       'Large-Cap Core Funds', 'Multi-Cap Value Funds',
       'Multi-Cap Growth Funds', 'Global Real Estate Funds',
       'Global Large-Cap Core', 'Global Multi-Cap Value Fds',
       'Global Multi-Cap Core Fds', 'International Small/Mid-Cap Core',
       'International Small/Mid-Cap Growth',
       'International Sm/Md-Cp Gro Fds',
       'Mixed-Asset Target Alloc Consv Funds',
       'Mixed-Asset Trgt Alloc Con Fds', 'International Multi-Cap Core',
       'International Multi-Cp Core Fds', 'Global Equity Income Funds',
       'International Equity Income Fd

Here we select the top 10 largest funds as the end of 2020 within the 'S&P 500 Index Funds' Lipper Asset Classification and launched before 2010, giving us at least 10 years worth of data. We select the largest fund the **BlackRock Funds III: iShares S&P 500 Index Fund; Class K Shares**

In [73]:
crsp_eq_sp500_2020 = crsp_mf_since2016[(crsp_mf_since2016['lipper_class_name'] == 'S&P 500 Index Funds') & 
                                        (crsp_mf_since2016['caldt'] == 20201231) &
                                        (crsp_mf_since2016['first_offer_dt'] <= 20100000)].sort_values(by='nav_latest', ascending=False).reset_index()

print_full(crsp_eq_sp500_2020[['fund_name', 'nav_latest', 'crsp_fundno']].head(10))

                                                                                         fund_name  \
0                                  BlackRock Funds III: iShares S&P 500 Index Fund; Class K Shares   
1                                                          iShares Trust: iShares Core S&P 500 ETF   
2                                                                           SPDR S&P 500 ETF Trust   
3                   Variable Insurance Products Fund II: Index 500 Portfolio; Initial Class Shares   
4                   Variable Insurance Products Fund II: Index 500 Portfolio; Service Class Shares   
5                 Variable Insurance Products Fund II: Index 500 Portfolio; Service Class 2 Shares   
6                                   Vanguard Index Funds: Vanguard 500 Index Fund; Investor Shares   
7                                    Vanguard Index Funds: Vanguard 500 Index Fund; Admiral Shares   
8  Vanguard Institutional Index Fund: Vanguard Institutional Index Fund; Instituti

In [79]:
crsp_eq_sp500_2020.iloc[0][['fund_name', 'nav_latest', 'crsp_fundno']]

fund_name      BlackRock Funds III: iShares S&P 500 Index Fund; Class K Shares
nav_latest                                                              444.28
crsp_fundno                                                               5887
Name: 0, dtype: object

## Selecting a Fixed Income Fund

In [83]:
#See Lipper Classification Names for Equity Funds
#No particular category was selected
display(crsp_mf_since2016[crsp_mf_since2016['lipper_asset_cd'] != 'EQ']['lipper_class_name'].unique())

array(['Core Bond Funds', 'Loan Participation Funds',
       'International Income Funds', 'Global Income Funds',
       'Inflation Protected Bond Funds',
       'Intermediate U.S. Government Funds',
       'Alternative Credit Focus Funds',
       'Short Investment Grade Debt Funds', 'High Yield Funds',
       'U.S. Government Money Market Funds',
       'Instl U.S. Government Money Market Funds', 'Money Market Funds',
       'Specialty Fixed Income Funds', 'Corporate Debt Funds BBB-Rated',
       'U.S. Mortgage Funds', 'Short U.S. Government Funds',
       'Ultra-Short Obligations Funds',
       'Short-Intmdt Investment Grade Debt Funds',
       'Global High Yield Funds', 'Absolute Return Bond Funds',
       'Core Plus Bond Funds', 'General U.S. Treasury Funds',
       'Short U.S. Treasury Funds', 'GNMA Funds',
       'U.S. Treasury Money Market Funds',
       'General U.S. Government Funds',
       'Instl U.S. Treasury Money Market Funds', 'Flexible Income Funds',
       'Instl Money

Here we select the top 10 largest funds as the end of 2020 that are not an equity fund and launched before 2010, giving us at least 10 years worth of data. We select another BlackRock fund that tracks treasury bonds **iShares Trust: iShares 7-10 Year Treasury Bond ETF**

In [98]:
crsp_mf_tx_bond_2021 = crsp_mf_since2016[(crsp_mf_since2016['lipper_asset_cd'] != 'EQ') & 
                                        (crsp_mf_since2016['caldt'] == 20201231) &
                                        (crsp_mf_since2016['first_offer_dt'] <= 20120000)].sort_values(by='nav_latest', ascending=False).reset_index()
print_full(crsp_mf_tx_bond_2021[['fund_name', 'nav_latest', 'crsp_fundno']].head(15))

                                                                                fund_name  \
0      PIMCO ETF Trust: PIMCO 25+ Year Zero Coupon US Treasury Index Exchange-Traded Fund   
1                                     iShares Trust: iShares 10-20 Year Treasury Bond ETF   
2                                       iShares Trust: iShares 20+ Year Treasury Bond ETF   
3        Vanguard World Funds: Vanguard Extended Duration Treasury Index Fund; ETF Shares   
4                      iShares Trust: iShares iBoxx $ Investment Grade Corporate Bond ETF   
5                                       iShares Trust: iShares 3-7 Year Treasury Bond ETF   
6                                     Invesco CurrencyShares British Pound Sterling Trust   
7                                                    iShares Trust: iShares TIPS Bond ETF   
8                                       iShares Trust: iShares Government/Credit Bond ETF   
9                            Rydex Series Funds: High Yield Strategy F

In [100]:
crsp_mf_tx_bond_2021.iloc[11][['fund_name', 'nav_latest', 'crsp_fundno']]

fund_name      iShares Trust: iShares 7-10 Year Treasury Bond ETF
nav_latest                                                 119.95
crsp_fundno                                                 16431
Name: 11, dtype: object

#Extracting Daily Returns

From here we go to WRDS and access daily returns by fund code: 5887, 16431.

Following the link below you will be able to select query the daily returns.

Data between 2010-2012 was extracted in a CSV format with dates formatted to MMDDYY10.

A WRDS account and access is required to access the platform.

https://wrds-www.wharton.upenn.edu/pages/get-data/center-research-security-prices-crsp/quarterly-update/mutual-funds/daily-returns/