In [28]:
import pandas as pd
import numpy as np

# Form portfolios using THREE factors (momentum + size and illiquidity)

- Portfolio restriction: exclude stocks with price less than $5 at time of stock ranking (portfolio formation).
- Adjust the sign of the variables so that they have positive relation with stock returns.
- Rank stocks by each sign-adjusted signal
- Sort Decile Portfolio Based on average of ranks cross variables (monthly basis)

In [29]:
def load_month_data():
    stock = pd.read_csv("stock_monthly_2005.csv",low_memory=False)
    stock['date'] = stock['date'].apply(lambda x:int(str(x)[:6]))
    stock['yyyyq'] = stock['date'].apply(lambda x:pd.to_datetime(str(int(x)*100+1)).year*10 
                                         + pd.to_datetime(str(int(x)*100+1)).quarter)
    stock['RET'] = stock['RET'].apply(lambda x:float_value(x))
    stock['market_cap'] = stock['PRC'] * stock['SHROUT']
    #1-month ahead returns
    stock['RET_1'] = stock.groupby("PERMNO")['RET'].shift(-1)
    return stock.dropna()

def float_value(x):
    try:
        return float(x)
    except:
        return np.nan

def clean_data(data):
    df = data.copy()
    df = df[df.PRC >= 5]
    df = df[df.EXCHCD.isin([1,2,3])]
    return df

def merge_factor(data,quarter_factor,month_factor):
    df = data.copy()
    result = pd.merge(df,quarter_factor,how='left',left_on=['PERMNO','yyyyq'],right_on=['PERMNO','yyyyq'])
    result = pd.merge(result,month_factor,how='left',left_on=['PERMNO','date'],right_on=['PERMNO','date'])
    return result

In [30]:
def rank_variables(data,variables):
    # Return rank variable of each row according to variables
    return data.groupby('date')[variables].rank()

def rank_factor(data):
    df = data.copy()
    df['SIZE'] = -1*df['SIZE'] #adjust sign
    for i in [(1,'SIZE'),(2,'illiq'),(3,'mom')]:
        df['Rank_{}'.format(i[0])] = df.groupby('date')[i[1]].rank()
    df['Overall_Rank'] = df[['Rank_{}'.format(p) for p in range(1,4)]].mean(axis = 1)
    # Overall_Rank=mean(R1,R2,R3), R1,R2,R3 is a row's rank # sorted by -SIZE, illiq, mom respectively.
    return df

def rank_port(data,var,number):
    df = data.copy()
    df['rank'] = df.groupby("date")[var].apply(lambda x:pd.qcut(x,number,
                                                                  labels = False,duplicates='drop'))
    return df

#value weighted returns
def vw_return(data,var):
    result = data.groupby('date').apply(lambda x: pd.Series(
                                                           np.average(x[[var]], 
                                                           weights=x["market_cap"], 
                                                           axis=0), [var]))
    return result

#Equal weighted returns
def ew_return(data,var):
    result = data.groupby('date')[[var]].mean()
    return result

# The average one month ahead return for each quintile
def returns_table(data,var,number,ew=True):
    result = pd.DataFrame()
    if ew:
        result['0'] = ew_return(data[data['rank'] == 0],var = var)
        for n in range(1,number):
            result['{}'.format(n)] = ew_return(data[data['rank'] == n],var = var)
        result['H-L'] = result['9'] - result['0']
    else:
        result['0'] = vw_return(data[data['rank'] == 0],var = var)
        for n in range(1,number):
            result['{}'.format(n)] = vw_return(data[data['rank'] == n],var = var)
        result['H-L'] = result['9'] - result['0']
    return result

In [31]:
#get the other factor data created in previous example
df = merge_factor(data=clean_data(data=load_month_data()),
             quarter_factor=pd.read_csv("Quarterly_Factor.csv").drop(columns=['Unnamed: 0']),
             month_factor=pd.read_csv("Monthly_Mom_Factor.csv").drop(columns=['Unnamed: 0'])).dropna()
#build variable
x = rank_port(data = rank_factor(data = df),var = 'Overall_Rank', number = 10)
x.head(10)

Unnamed: 0,PERMNO,date,SHRCD,EXCHCD,SICCD,TICKER,COMNAM,CUSIP,PRC,VOL,...,RET_1,SIZE,turnd,illiq,mom,Rank_1,Rank_2,Rank_3,Overall_Rank,rank
1395,10104,200501,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,13.77,10604413.0,...,-0.05955,-64164434.88,9.069597,-2.155613e-11,-0.660575,26.0,213.0,35.0,91.333333,0
1406,10104,200512,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,12.21,11151977.0,...,0.029484,-63028325.25,8.29871,3.772266e-12,-0.075802,33.0,141.0,88.0,87.333333,0
1407,10104,200601,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,12.57,10808415.0,...,-0.011933,-72721280.0,10.377902,1.110456e-11,-0.084967,31.0,103.0,49.0,61.0,0
1408,10104,200602,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,12.42,9196426.0,...,0.102254,-72721280.0,10.377902,1.110456e-11,-0.057142,31.0,104.0,63.0,66.0,0
1409,10104,200603,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,13.69,13605570.0,...,0.065741,-72721280.0,10.377902,1.110456e-11,0.007212,31.0,104.0,86.0,73.666667,0
1410,10104,200604,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,14.59,8854244.0,...,-0.02536,-75811680.0,7.640875,-1.958549e-11,0.074395,28.0,231.0,107.0,122.0,0
1411,10104,200605,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,14.22,7231803.0,...,0.018987,-75811680.0,7.640875,-1.958549e-11,0.069532,28.0,230.0,120.0,126.0,1
1412,10104,200606,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,14.49,9402460.0,...,0.033126,-75811680.0,7.640875,-1.958549e-11,0.105303,28.0,230.0,141.0,133.0,1
1413,10104,200607,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,14.97,7933511.0,...,0.046092,-92167708.76,8.472446,3.650453e-11,0.0479,24.0,124.0,155.0,101.0,0
1414,10104,200608,11.0,3.0,7370,ORCL,ORACLE CORP,68389X10,15.66,8166519.0,...,0.132822,-92167708.76,8.472446,3.650453e-11,0.115473,24.0,125.0,202.0,117.0,1


In [32]:
#ew decile portfolio (10-year 2011 - 2020)
returns_table(data = x[x.date >= 201101], var = 'RET_1',number = 10).describe().iloc[1:3]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,H-L
mean,0.004282,0.008148,0.009143,0.009718,0.011359,0.014503,0.015813,0.019751,0.022836,0.03039,0.026107
std,0.052447,0.051961,0.049664,0.046103,0.046456,0.044153,0.042446,0.042195,0.041317,0.043704,0.038671


In [33]:
#vw decile portfolio (10-year 2011 - 2020)
returns_table(data = x[x.date >= 201101], var = 'RET_1',number = 10,ew=False).describe().iloc[1:3]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,H-L
mean,0.006289,0.011406,0.013879,0.011564,0.014555,0.016333,0.018848,0.02089,0.022137,0.027748,0.021459
std,0.04681,0.040011,0.040357,0.04376,0.041226,0.042892,0.041568,0.037914,0.038968,0.042666,0.035934


In [34]:
# Creating the results table for future hw5 etc.
# ew
returns_table(data = x[x.date >= 201101], var = 'RET_1',
              number = 10).to_csv("SIZE_Mom_Illiq_month_ew.csv")

In [35]:
# vw
returns_table(data = x[x.date >= 201101], var = 'RET_1',
              number = 10,ew=False).to_csv("SIZE_Mom_Illiq_month_vw.csv")