In [1]:
import pandas as pd
import numpy as np

## Download stock monthly return data (MSF) over the period of 1990- 2010 from WRDS and complete the following tasks:
- Clean data: take absolute value of price; remove price<$5
- Compute the size (in $1000) of the stocks (price*shrout)
- Get the year and month for each date
- For each stock, get its month t+1 stock return
- Each month, rank all stocks into 10 portfolios by month t return
- Get the equal weighted and market capital weighted average returns at month t+1 for each of the 10 portfolios
- Get the average returns of the 10 portfolios over the entire time period.
- Get the standard deviation of the returns of each portfolio

In [2]:
data = pd.read_csv("monthly_2009to2020.csv")

In [3]:
#Clean data: take absolute value of price; remove price< 5−𝐶𝑜𝑚𝑝𝑢𝑡𝑒𝑡ℎ𝑒𝑠𝑖𝑧𝑒(𝑖𝑛 1000) of the stocks (price*shrout)
def float_data(x):
    try:
        return float(x)
    except:
        return np.nan

def clean_data(data):
    df = data.copy()
    df = df[df.EXCHCD.isin([1,2,3,4])]
    # keeping only the exchange codes 1,2,3,4 for NYSE, NYSE, NASDAQ and ARCA respectively
    df['PRC'] = df['PRC'].apply(lambda x:float_data(x))
    df['SHROUT'] = df['SHROUT'].apply(lambda x:float_data(x))
    df = df[df.PRC >= 5]
    df['size'] = round(df.PRC * df.SHROUT / 1000)
    return df

def month_year(data):
    df = data.copy()
    df['yyyymm'] = df['date'].apply(lambda x:round(x/100))
    return df

def next_month_return(data):
    df = data.copy()
    df['RET'] = df['RET'].apply(lambda x:float_data(x))
    df['RET_1'] = df.groupby('PERMNO')['RET'].shift(-1)
    return df.dropna()

def rank_port(data,var,number):
    df = data.copy()
    df['rank'] = df.groupby("yyyymm")[var].apply(lambda x:pd.qcut(x,number,
                                                                  labels = False,duplicates='drop'))
    # pd.qcut: rank and put variables in equal size bins
    return df

#value weighted returns
def vw_return(data,var):
    result = data.groupby('yyyymm').apply(lambda x: pd.Series(
                                                           np.average(x[[var]], 
                                                           weights=x["size"], 
                                                           axis=0), [var]))
    return result

#Equal weighted returns
def ew_return(data,var):
    result = data.groupby('yyyymm')[[var]].mean()
    return result

def returns_table(data,var,number,ew=True):
    result = pd.DataFrame()
    if ew:
        result['0'] = ew_return(data[data['rank'] == 0],var = var)
        for n in range(1,number):
            result['{}'.format(n)] = ew_return(data[data['rank'] == n],var = var)
    else:
        result['0'] = vw_return(data[data['rank'] == 0],var = var)
        for n in range(1,number):
            result['{}'.format(n)] = vw_return(data[data['rank'] == n],var = var)
    return result

In [4]:
x = rank_port(data = next_month_return(data = month_year(data = clean_data(data = data))),var='RET',number = 10)

In [5]:
#ew
df_ew = returns_table(data = x, var = 'RET_1',number = 10)
#vw
df_vw = returns_table(data = x, var = 'RET_1',number = 10,ew=False)

In [6]:
display(df_ew.describe().iloc[1:3])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
mean,0.024098,0.014074,0.011852,0.010141,0.009799,0.009294,0.009511,0.009546,0.009884,0.015316
std,0.062032,0.052858,0.047702,0.043913,0.042516,0.041821,0.040997,0.040845,0.042351,0.047886


In [7]:
display(df_vw.describe().iloc[1:3])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
mean,0.008845,0.009566,0.008661,0.009922,0.007634,0.009294,0.009379,0.007536,0.008075,0.008959
std,0.070089,0.057921,0.050077,0.046571,0.044106,0.044829,0.043067,0.043495,0.045831,0.052329
