# Assignment 4

In [1]:
import pandas as pd
from pandasql import sqldf
import matplotlib.pyplot as plt
import numpy as np
import warnings
from sklearn import linear_model
import statsmodels.api as sm
import scipy.stats as stats
from math import sqrt

warnings.filterwarnings('ignore')

In [2]:
# read raw data
all_monthly_data = pd.read_sas("CA.sas7bdat", encoding = 'ISO-8859-1')

In [3]:
all_monthly_data.head()

Unnamed: 0,permno,date,ret,ret_t1,TICKER,COMNAM,PRC,SHROUT,datadate,fyearq,...,bk2mkt_winsorized_zscore,ep1_winsorized_zscore,ep2_winsorized_zscore,gvkey,beta,ivol,mom,beta_winsorized,ivol_winsorized,mom_winsorized
0,10107.0,1997-01-31,0.234493,-0.04411,MSFT,MICROSOFT CORP,102.0,1198000.0,1996-09-30,1997.0,...,-1.044416,-0.066718,0.274438,12141,,0.012851,,,0.012851,
1,10107.0,1997-02-28,-0.04411,-0.05961,MSFT,MICROSOFT CORP,97.5,1198000.0,1996-09-30,1997.0,...,-1.052875,0.001182,0.33749,12141,,0.014933,,,0.014933,
2,10107.0,1997-03-31,-0.05961,0.325153,MSFT,MICROSOFT CORP,91.6875,1191000.0,1996-09-30,1997.0,...,-1.048655,-0.001393,0.309762,12141,,0.016597,,,0.016597,
3,10107.0,1997-04-30,0.325153,0.020576,MSFT,MICROSOFT CORP,121.5,1191000.0,1996-12-31,1997.0,...,-1.080912,0.003298,-0.021655,12141,,0.022905,,,0.022905,
4,10107.0,1997-05-30,0.020576,0.019153,MSFT,MICROSOFT CORP,124.0,1191000.0,1996-12-31,1997.0,...,-1.023077,-0.078655,-0.149326,12141,,0.011146,,,0.011146,


***
## Task 1 - [m, n, l] Quitile Portfolios
- We wish to use the [m,n,l] month rule to construct a quantile portfolio for each of the winsorized factors from A3 for Jan 2000-Nov 2021
- We will actually calculate all returns from Jan **1997** and then truncate when displaying, because it will help us in part b
- We forecast one month ahead returns
- **Our m,n,l parameters have n = 0, l = 1 so we assume no waiting period and a holding period of 1 month**
- Our estimation window for each time t is the data that we have before, and at time t

In [4]:
winsorized_factors = all_monthly_data[["permno", "date", "ret", "ret_t1", "lnSize_winsorized", "bk2mkt_winsorized", "ep1_winsorized", "beta_winsorized", "ivol_winsorized", "mom_winsorized"]]
# winsorized_factors = winsorized_factors.loc[winsorized_factors["date"] >= "1999-12-01"]
winsorized_factors.reset_index(inplace=True)
winsorized_factors.drop("index", inplace=True, axis=1)
winsorized_factors

Unnamed: 0,permno,date,ret,ret_t1,lnSize_winsorized,bk2mkt_winsorized,ep1_winsorized,beta_winsorized,ivol_winsorized,mom_winsorized
0,10107.0,1997-01-31,0.234493,-0.044110,25.528892,0.059552,0.005025,,0.012851,
1,10107.0,1997-02-28,-0.044110,-0.059610,25.483772,0.062300,0.005257,,0.014933,
2,10107.0,1997-03-31,-0.059610,0.325153,25.416445,0.066639,0.005623,,0.016597,
3,10107.0,1997-04-30,0.325153,0.020576,25.697973,0.059859,0.005121,,0.022905,
4,10107.0,1997-05-30,0.020576,0.019153,25.718341,0.058652,0.005017,,0.011146,
...,...,...,...,...,...,...,...,...,...,...
22292,93436.0,2021-07-30,0.011034,0.070605,27.245856,0.033832,0.000644,2.042673,0.017695,1.401589
22293,93436.0,2021-08-31,0.070605,0.054042,27.325881,0.031230,0.000594,2.057498,0.017366,0.476442
22294,93436.0,2021-09-30,0.054042,0.436530,27.380740,0.029563,0.000563,1.996046,0.011945,0.807639
22295,93436.0,2021-10-29,0.436530,0.027612,27.743234,0.022171,0.001021,2.169457,0.026497,1.870866


### Quintile Sorting Function
- We create a function, as suggested from the tutorial, to help us sort our factors into quintile portfolios

In [5]:
# As suggested from the tutorial, we will create a function to help us create our sorted quantile portfolios
permnos = set(winsorized_factors.permno)
months = list(winsorized_factors[winsorized_factors["permno"] == 10107.0]["date"])
reporting_months = [m.strftime("%Y-%m-%d") for m in months[1:]] + ["2021-12-31"] # shifted by 1 month

# General function name (can do any type of quantile) but we actually sorting into quintiles for this assignment
# This function sorts by factor at the end of every month and then equally holds through the next month and reports the returns portfolio return
def quantile_sort(factor: str, num_portfolios=5, name="Quintile"):
    factor_df = winsorized_factors[["permno", "date", "ret", "ret_t1", factor]]
    monthly_portfolio_returns = pd.DataFrame(columns = ["data_date", "return date"] + [f"{name} {i}" for i in range(1, num_portfolios+1)]) 

    # have two date reporting names to illustrate the use of the [n,m,l] method
    monthly_portfolio_returns["data_date"] = months
    monthly_portfolio_returns["return date"] = reporting_months
    monthly_portfolio_returns.set_index("data_date", inplace=True)
    
    # Every month we sort stocks based on the factor
    for month in months:
        month_df = factor_df[factor_df["date"] == month]
        month_df.sort_values(by=factor, inplace=True, ascending=True) # Sorted in ascending order, smallest quantile starts at index 0

        permno_quantiles = np.array_split(list(month_df["permno"]), num_portfolios) # Splits into 5 equal chunks
        for (i, quantile) in enumerate(permno_quantiles):

            # note that we use ret_t1 avoiding look-ahead bias (we only have the factors available at time t)
            quantile_returns = factor_df[(factor_df["date"] == month) & (factor_df["permno"].isin(quantile))]["ret_t1"]
            
            monthly_portfolio_returns.loc[month, f"{name} {i+1}"] = np.nanmean(quantile_returns) # in case of nan values
            
    # Generate returns
    return monthly_portfolio_returns

## Quantile Portfolios
- Using our function, we are able to sort our portfolios into quantiles by their factor values

The way our code handles the data avoids look-ahead bias for return prediction because we sort using the factor results from time t and only report the t+1 returns (from `ret_t1`. This is equivalent to us forming an equally-weighted index at time t using the factor data we have access to, then holding for a month until time t+1 and noting down returns during the period. This way, we don't make any decisions using future data, so there is no look-ahead bias.

**This is illustrated below**

Note the difference between `data_date` (the date where factors were sorted) and `return_date` (returns date)

Note that we have data from 1997 - this is to help in our part b

In [6]:
quantile_sort("lnSize_winsorized")

Unnamed: 0_level_0,return date,Quintile 1,Quintile 2,Quintile 3,Quintile 4,Quintile 5
data_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1997-01-31,1997-02-28,0.001083,-0.046318,-0.017539,-0.014411,-0.037853
1997-02-28,1997-03-31,-0.063589,-0.113885,-0.034161,0.017856,-0.043479
1997-03-31,1997-04-30,0.016221,0.025494,0.027538,0.010283,0.113056
1997-04-30,1997-05-30,0.206196,0.189984,0.119348,0.079507,0.082713
1997-05-30,1997-06-30,0.0074,0.002987,0.014039,0.017325,-0.003483
...,...,...,...,...,...,...
2021-07-30,2021-08-31,0.027984,0.046413,0.031173,0.006084,0.044354
2021-08-31,2021-09-30,-0.041886,-0.024441,-0.069276,-0.052418,-0.050429
2021-09-30,2021-10-29,0.091362,0.043535,0.083113,0.029173,0.0564
2021-10-29,2021-11-30,-0.063331,0.009399,-0.05942,0.026617,0.013328


In [7]:
## Next, we need to save our results to excel
factors = ["lnSize_winsorized", "bk2mkt_winsorized", "ep1_winsorized", "beta_winsorized", "ivol_winsorized", "mom_winsorized"]

quintile_portfolios = {}

for factor in factors:
    quintile_portfolio = quantile_sort(factor, num_portfolios=5, name="Quintile")

    # Rename some columns to match assignment format requirement
    quintile_portfolio.reset_index(inplace=True)
    quintile_portfolio.rename(columns = {"return date": "date"}, inplace=True)
    quintile_portfolio.drop(["data_date"], axis=1, inplace=True)

    # Save
    quintile_portfolios[factor] = quintile_portfolio

with pd.ExcelWriter('datasets-A3.xlsx') as writer:
    # Write each DataFrame to its own sheet
    for sheet_name, df in quintile_portfolios.items():
        df[df["date"] >= "2000-01-01"].to_excel(writer, sheet_name=sheet_name.split("_")[0] + " quintile returns", index=False)


### Average quintile portfolio returns

In [8]:
print("-----------------------------")
for factor in factors:
    print(factor)
    factor_df_2000 = quintile_portfolios[factor]
    factor_df_2000 = factor_df_2000[factor_df_2000["date"] >= "2000-01-01"]
    factor_df_2000.set_index("date", inplace=True)
    print(factor_df_2000.mean().astype(float).round(4))
    print("-----------------------------")

-----------------------------
lnSize_winsorized
Quintile 1    0.0365
Quintile 2    0.0216
Quintile 3    0.0149
Quintile 4    0.0102
Quintile 5    0.0102
dtype: float64
-----------------------------
bk2mkt_winsorized
Quintile 1    0.0220
Quintile 2    0.0157
Quintile 3    0.0182
Quintile 4    0.0188
Quintile 5    0.0199
dtype: float64
-----------------------------
ep1_winsorized
Quintile 1    0.0274
Quintile 2    0.0166
Quintile 3    0.0142
Quintile 4    0.0168
Quintile 5    0.0193
dtype: float64
-----------------------------
beta_winsorized
Quintile 1    0.0164
Quintile 2    0.0182
Quintile 3    0.0192
Quintile 4    0.0175
Quintile 5    0.0234
dtype: float64
-----------------------------
ivol_winsorized
Quintile 1    0.0141
Quintile 2    0.0124
Quintile 3    0.0183
Quintile 4    0.0209
Quintile 5    0.0294
dtype: float64
-----------------------------
mom_winsorized
Quintile 1    0.0219
Quintile 2    0.0175
Quintile 3    0.0153
Quintile 4    0.0136
Quintile 5    0.0263
dtype: float64
--

### Q1 b) 
- Now we form the hedge portfolio using quantiles 1 and 5 for each of the factors

**Which ones we short and long**

Basically, we want to **short** the lesser-value stocks and **long** the higher-value factor. By value I mean does a higher value in the factor net more returns?

Note that our quintiles are shorted from smallest to highest (factor value)

- **lnSize_winsorized**
    - We know from class that small stocks tend to outperform.
    - So we long quintile 1 and short quintile 5.
- **bk2mkt_winsorized**
    - We know from class that value firms (high book to market) tend to outperform grown firms (low book to market).
    - So we long quintile 5 and short quintile 1.
- **ep1_winsorized**
    - Note ep1 is IBQ (Income before extraordinary items) / Market equity (from Assignment 2)
    - ep1 is a measure of the company's income (earnings) per dollar valuation (market cap)
    - In theory, a company with a higher earnings to valuation ratio should perform better (is undervalued) compared to a company with a low ratio (overvalued)
    - So we long quintile 5 and short quintile 1
- **beta_winsorized**
    - Frazzini and Pedersen argue that high-beta stocks are overbought due to the inherent leverage they offer
    - Therefore high beta stocks generate proportionally lower non-leveraged returns 
    - So we Long low beta and short high beta
    - This means we long quintile 1 and short quintile 5
- **ivol_winsorized**
    - Idiosyncratic risk is risk that is associated with the stock itself, not to the market
    - Ang. et al. found that high idiosyncratic volatility have lower returns
    - Hou and Loh argue that investors' lottery preferences, market frictions, etc add excess demand for high ivol stocks, bidding up prices and reducing average returns
    - Therefore we want to short high ivol and long low ivol
    - So we long portfolio 1 short portfolio 5
- **mom_winsorized**
    - We learned in class that stocks high momentum stocks tend to perform well, and due to the short 1 month timeframe it can be realized
    - We long portfolio 5 (high momentum) and short portfolio 1 (low momentum)

In [9]:
hedge_portfolios = pd.DataFrame(columns=["date", "lnSize", "bk2mkt", "ep1", "beta", "ivol", "mom"])
hedge_portfolios["date"] = reporting_months
hedge_portfolios.set_index("date", inplace=True)

# lnSize - Long small short large
hedge_portfolios["lnSize"] = list(quintile_portfolios["lnSize_winsorized"]["Quintile 1"] - quintile_portfolios["lnSize_winsorized"]["Quintile 5"])

# bk2mkt - Long large short small
hedge_portfolios["bk2mkt"] = list(quintile_portfolios["bk2mkt_winsorized"]["Quintile 5"] - quintile_portfolios["bk2mkt_winsorized"]["Quintile 1"])

# ep1 - long large short small
hedge_portfolios["ep1"] = list(quintile_portfolios["ep1_winsorized"]["Quintile 5"] - quintile_portfolios["ep1_winsorized"]["Quintile 1"])

# beta - long small short large
hedge_portfolios["beta"] = list(quintile_portfolios["beta_winsorized"]["Quintile 1"] - quintile_portfolios["beta_winsorized"]["Quintile 5"])

# ivol - long small short large
hedge_portfolios["ivol"] = list(quintile_portfolios["ivol_winsorized"]["Quintile 1"] - quintile_portfolios["ivol_winsorized"]["Quintile 5"])

# mom - long large short small
hedge_portfolios["mom"] = list(quintile_portfolios["mom_winsorized"]["Quintile 5"] - quintile_portfolios["mom_winsorized"]["Quintile 1"])

In [10]:
hedge_portfolios.head()

Unnamed: 0_level_0,lnSize,bk2mkt,ep1,beta,ivol,mom
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1997-02-28,0.038935,0.057986,0.080561,-0.015846,0.017745,0.015846
1997-03-31,-0.02011,0.011559,0.045457,-0.018148,0.09513,0.018148
1997-04-30,-0.096835,-0.20395,-0.069026,-0.028493,-0.091376,0.028493
1997-05-30,0.123484,-0.024017,-0.008043,-0.074106,-0.170252,0.074106
1997-06-30,0.010884,0.015767,0.053533,-0.027975,0.06186,0.027975


For each of these portfolios, we want to now know the following for each hedged portfolio
- Overall return
- Excess return over the market
- CAPM alpha
- Fama-french four factor alpha
- Sharpe ratio

We also want to know the significance of each of these statistics (whether they differ significantly from zero)

### CAPM and FF4 Alpha
- **We will use the technique employed during Assignment 2, utilizing a 36-month lookback for factor data to generate our betas**
- Therefore for our first explained return for each portfolio (2000-01-31) we need to download data up to 36 months back, which is 1997-01-31

In [11]:
# read raw data for factors
ff3_factors = pd.read_sas("ff3+mom.sas7bdat", encoding = 'ISO-8859-1') # Has no null values so we are good

In [12]:
ff3_factors

Unnamed: 0,DATEFF,SMB,HML,MKTRF,RF,UMD
0,1997-01-31,-0.0195,-0.0142,0.0499,0.0045,0.0196
1,1997-02-28,-0.0322,0.0567,-0.0049,0.0039,-0.0213
2,1997-03-31,-0.0036,0.0339,-0.0503,0.0043,0.0090
3,1997-04-30,-0.0577,0.0007,0.0404,0.0043,0.0484
4,1997-05-30,0.0519,-0.0413,0.0674,0.0049,-0.0517
...,...,...,...,...,...,...
295,2021-08-31,-0.0042,-0.0015,0.0291,0.0000,0.0245
296,2021-09-30,0.0071,0.0508,-0.0437,0.0000,0.0149
297,2021-10-29,-0.0235,-0.0049,0.0665,0.0000,0.0319
298,2021-11-30,-0.0132,-0.0045,-0.0155,0.0000,0.0088


In [36]:
# return dates are the dates that we returns data for (array)
def analyze_returns(portfolio: str, portfolio_df, return_dates, since="2000-01-01"):
    portfolio_returns = portfolio_df[portfolio]
    # Fit fama-french and CAPM models using 36-month lookback
    beta = {'date':[], 'capm_beta': [], 'capm_alpha': [], 'ff4_beta':[], 'smb_beta':[], 'hml_beta':[], 'ff4_alpha': []}

    # Find first index where date is past since
    analysis_start_date_idx = 0
    
    for (i, date) in enumerate(return_dates):
        if type(date) != str:
            date = date.strftime("%Y-%m-%d")
        if date >= since:
            analysis_start_date_idx = i
            break
    
    # We dont have a full 36 months for the first iteration, only 35 months because of [m,n,l] rule so we just use 35 for first iteration
    for i in range(analysis_start_date_idx, len(return_dates)):
        date = return_dates[i]
        returns = None
        factors = None
        if i - 37 < 0:
            returns = portfolio_returns.iloc[0:i]
            factors = ff3_factors[1:i+1]
        else:
            returns = portfolio_returns.iloc[i-37:i]
            factors = ff3_factors[i-36:i+1]

        # Run the regressions
        CAPMmodel = linear_model.LinearRegression().fit(factors[["MKTRF"]], returns)
        ff4model = linear_model.LinearRegression(n_jobs=3).fit(factors[["MKTRF", "SMB", "HML"]], returns)

        beta['date'].append(date)
        beta['capm_beta'].append(CAPMmodel.coef_[0])
        beta['capm_alpha'].append(CAPMmodel.intercept_)
        beta['ff4_beta'].append(ff4model.coef_[0])
        beta['smb_beta'].append(ff4model.coef_[1])
        beta['hml_beta'].append(ff4model.coef_[2])
        beta['ff4_alpha'].append(ff4model.intercept_)

    beta_df = pd.DataFrame.from_dict(beta)

    ####################################################################################################################################
    # Now we calculate the performance characteristics and aggregate into a 1-d result
    analysis_dates = set(beta_df["date"]) # speedup
    
    portfolio_returns = portfolio_returns.reset_index()
    portfolio_returns = portfolio_returns[portfolio_returns["date"].isin(analysis_dates)]
    portfolio_returns = portfolio_returns.reset_index()
    
    factor_data = ff3_factors[ff3_factors["DATEFF"].isin(analysis_dates)].reset_index()
    
    summary_results = {}
    # Overall return (monthly)
    net_return = []
    returns = portfolio_returns[portfolio]
    net_return.append(returns.mean() * 100) # Average

    # Significance results
    ttest = stats.ttest_ind(list(returns), np.zeros(len(returns)))
    net_return.append(abs(ttest[0])) # abs(t stat)
    net_return.append(ttest[1]) # p value

    summary_results["Net Return (%)"] = net_return
    
    # Excess return (monthly)
    excess_returns = []
    xret = (returns - factor_data["MKTRF"])
    excess_returns.append(xret.mean() * 100) # Average

    # Significance results
    ttest = stats.ttest_ind(list(xret), np.zeros(len(xret)))
    excess_returns.append(abs(ttest[0])) # abs(t stat)
    excess_returns.append(ttest[1]) # p value

    summary_results["Excess Return (%)"] = excess_returns
    
    # CAPM alpha (monthly)
    capm_alpha_result = []
    capm_alpha = beta_df["capm_alpha"]
    capm_alpha_result.append(capm_alpha.mean() * 100) # Average

    # Significance results
    ttest = stats.ttest_ind(capm_alpha, np.zeros(len(capm_alpha)))
    capm_alpha_result.append(abs(ttest[0])) # abs(t stat)
    capm_alpha_result.append(ttest[1]) # p value
    
    summary_results["CAPM Alpha (%)"] = capm_alpha_result

    
    # FF4 alpha (monthly)
    ff4_alpha_result = []
    ff4_alpha = beta_df["ff4_alpha"]
    ff4_alpha_result.append(ff4_alpha.mean() * 100) # Average

    # Significance results
    ttest = stats.ttest_ind(ff4_alpha, np.zeros(len(ff4_alpha)))
    ff4_alpha_result.append(abs(ttest[0])) # abs(t stat)
    ff4_alpha_result.append(ttest[1]) # p value
    
    summary_results["FF4 Alpha (%)"] = ff4_alpha_result

    
    # Sharpe = r_p = r_f / vol
    # * Note we will take volatility as volatility of portfolio returns
    monthly_volatility = returns.std()
    sharpe = []
    sharpe_monthly = (returns - factor_data["RF"]) / monthly_volatility
    sharpe.append(sharpe_monthly.mean())
    
    # Significance results
    ttest = stats.ttest_ind(list(sharpe_monthly), np.zeros(len(sharpe_monthly))) # Compare against H_0 = 0 (worst sharpe)
    sharpe.append(abs(ttest[0])) # abs(t stat)
    sharpe.append(ttest[1]) # p value
    
    summary_results["Sharpe"] = sharpe


    return summary_results


In [14]:
# Based on my interpretation of the question, I will just run this analysis on the all portfolios
quin_portfolios = {}

hedged_portfolios = {"portfolio": [], 
                      "Overall Return (%)": [], "ret |t|": [], "ret p-value": [], 
                      "Excess Return (%)": [], "xret |t|": [], "xret p-value": [],
                      "CAPM Alpha (%)": [], "CAPM |t|": [], "CAPM p-value": [],
                      "FF4 Alpha (%)": [], "FF4 |t|": [], "FF4 p-value": [],
                      "Sharpe": [], "sharpe |t|": [], "sharpe p-value": []}

# Hedged Portfolios first
for portfolio in ["lnSize", "bk2mkt", "ep1", "beta", "ivol", "mom"]:
    results = analyze_returns(portfolio, hedge_portfolios, hedge_portfolios.index)

    # Append into hedged_portfolios
    hedged_portfolios['portfolio'].append(portfolio)
    hedged_portfolios["Overall Return (%)"].append(results["Net Return (%)"][0])
    hedged_portfolios["ret |t|"].append(results["Net Return (%)"][1])
    hedged_portfolios["ret p-value"].append(results["Net Return (%)"][2])

    hedged_portfolios["Excess Return (%)"].append(results["Excess Return (%)"][0])
    hedged_portfolios["xret |t|"].append(results["Excess Return (%)"][1])
    hedged_portfolios["xret p-value"].append(results["Excess Return (%)"][2])

    hedged_portfolios["CAPM Alpha (%)"].append(results["CAPM Alpha (%)"][0])
    hedged_portfolios["CAPM |t|"].append(results["CAPM Alpha (%)"][1])
    hedged_portfolios["CAPM p-value"].append(results["CAPM Alpha (%)"][2])

    hedged_portfolios["FF4 Alpha (%)"].append(results["FF4 Alpha (%)"][0])
    hedged_portfolios["FF4 |t|"].append(results["FF4 Alpha (%)"][1])
    hedged_portfolios["FF4 p-value"].append(results["FF4 Alpha (%)"][2])

    hedged_portfolios["Sharpe"].append(results["Sharpe"][0])
    hedged_portfolios["sharpe |t|"].append(results["Sharpe"][1])
    hedged_portfolios["sharpe p-value"].append(results["Sharpe"][2])

# Quintile Portfolios Next
for portfolio in ["lnSize_winsorized", "bk2mkt_winsorized", "ep1_winsorized", "beta_winsorized", "ivol_winsorized", "mom_winsorized"]:
    portfolio_quintiles = {"quintile": [], 
                          "Overall Return (%)": [], "ret |t|": [], "ret p-value": [], 
                          "Excess Return (%)": [], "xret |t|": [], "xret p-value": [],
                          "CAPM Alpha (%)": [], "CAPM |t|": [], "CAPM p-value": [],
                          "FF4 Alpha (%)": [], "FF4 |t|": [], "FF4 p-value": [],
                          "Sharpe": [], "sharpe |t|": [], "sharpe p-value": []}
    
    for quintile in [f"Quintile {i+1}" for i in range(5)]:
        results = analyze_returns(quintile, quintile_portfolios[portfolio].set_index("date"), quintile_portfolios[portfolio]["date"])

        # Append into quin_portfolios
        portfolio_quintiles["quintile"].append(quintile)
        portfolio_quintiles["Overall Return (%)"].append(results["Net Return (%)"][0])
        portfolio_quintiles["ret |t|"].append(results["Net Return (%)"][1])
        portfolio_quintiles["ret p-value"].append(results["Net Return (%)"][2])
    
        portfolio_quintiles["Excess Return (%)"].append(results["Excess Return (%)"][0])
        portfolio_quintiles["xret |t|"].append(results["Excess Return (%)"][1])
        portfolio_quintiles["xret p-value"].append(results["Excess Return (%)"][2])
    
        portfolio_quintiles["CAPM Alpha (%)"].append(results["CAPM Alpha (%)"][0])
        portfolio_quintiles["CAPM |t|"].append(results["CAPM Alpha (%)"][1])
        portfolio_quintiles["CAPM p-value"].append(results["CAPM Alpha (%)"][2])
    
        portfolio_quintiles["FF4 Alpha (%)"].append(results["FF4 Alpha (%)"][0])
        portfolio_quintiles["FF4 |t|"].append(results["FF4 Alpha (%)"][1])
        portfolio_quintiles["FF4 p-value"].append(results["FF4 Alpha (%)"][2])
    
        portfolio_quintiles["Sharpe"].append(results["Sharpe"][0])
        portfolio_quintiles["sharpe |t|"].append(results["Sharpe"][1])
        portfolio_quintiles["sharpe p-value"].append(results["Sharpe"][2])
    quin_portfolios[portfolio] = portfolio_quintiles

### Hedged Portfolio Returns

In [15]:
hedged_result = pd.DataFrame.from_dict(hedged_portfolios).round(2).set_index("portfolio").T
hedged_result

portfolio,lnSize,bk2mkt,ep1,beta,ivol,mom
Overall Return (%),2.63,-0.21,-0.8,-0.7,-1.53,0.45
ret |t|,6.3,0.55,1.73,1.35,2.86,0.89
ret p-value,0.0,0.58,0.08,0.18,0.0,0.38
Excess Return (%),2.02,-0.81,-1.41,-1.31,-2.14,-0.16
xret |t|,4.6,1.59,2.26,1.85,3.06,0.26
xret p-value,0.0,0.11,0.02,0.07,0.0,0.79
CAPM Alpha (%),2.47,-0.12,-0.7,-0.64,-1.72,0.58
CAPM |t|,30.11,1.7,13.67,7.25,22.05,7.91
CAPM p-value,0.0,0.09,0.0,0.0,0.0,0.0
FF4 Alpha (%),2.17,-0.26,-0.55,-0.67,-1.6,0.3


## All Quintile Portfolios
`["lnSize_winsorized", "bk2mkt_winsorized", "ep1_winsorized", "beta_winsorized", "ivol_winsorized", "mom_winsorized"]`

### lnSize (winsorized)

In [16]:
pd.DataFrame.from_dict(quin_portfolios["lnSize_winsorized"]).round(2).set_index("quintile").T

quintile,Quintile 1,Quintile 2,Quintile 3,Quintile 4,Quintile 5
Overall Return (%),3.65,2.16,1.49,1.02,1.02
ret |t|,6.39,4.9,3.92,2.86,3.15
ret p-value,0.0,0.0,0.0,0.0,0.0
Excess Return (%),3.04,1.56,0.89,0.42,0.42
xret |t|,7.07,5.8,4.0,2.21,2.79
xret p-value,0.0,0.0,0.0,0.03,0.01
CAPM Alpha (%),3.08,1.61,1.07,0.57,0.61
CAPM |t|,31.89,21.4,19.39,14.63,18.74
CAPM p-value,0.0,0.0,0.0,0.0,0.0
FF4 Alpha (%),2.75,1.56,1.05,0.53,0.58


### bk2mkt (winsorized)

In [17]:
pd.DataFrame.from_dict(quin_portfolios["bk2mkt_winsorized"]).round(2).set_index("quintile").T

quintile,Quintile 1,Quintile 2,Quintile 3,Quintile 4,Quintile 5
Overall Return (%),2.2,1.57,1.82,1.88,1.99
ret |t|,4.6,3.94,4.45,4.54,5.05
ret p-value,0.0,0.0,0.0,0.0,0.0
Excess Return (%),1.59,0.96,1.22,1.28,1.39
xret |t|,5.09,3.89,5.54,4.67,5.37
xret p-value,0.0,0.0,0.0,0.0,0.0
CAPM Alpha (%),1.73,1.03,1.25,1.42,1.62
CAPM |t|,31.4,35.39,20.4,16.19,20.72
CAPM p-value,0.0,0.0,0.0,0.0,0.0
FF4 Alpha (%),1.69,0.94,1.19,1.31,1.42


### ep1 (winsorized)

In [18]:
pd.DataFrame.from_dict(quin_portfolios["ep1_winsorized"]).round(2).set_index("quintile").T

quintile,Quintile 1,Quintile 2,Quintile 3,Quintile 4,Quintile 5
Overall Return (%),2.74,1.66,1.42,1.68,1.93
ret |t|,4.39,3.64,4.2,5.1,5.63
ret p-value,0.0,0.0,0.0,0.0,0.0
Excess Return (%),2.13,1.05,0.81,1.08,1.33
xret |t|,4.58,3.65,4.45,6.23,6.48
xret p-value,0.0,0.0,0.0,0.0,0.0
CAPM Alpha (%),2.16,1.2,0.95,1.26,1.45
CAPM |t|,29.62,25.31,20.99,19.57,23.1
CAPM p-value,0.0,0.0,0.0,0.0,0.0
FF4 Alpha (%),1.92,1.05,0.94,1.26,1.37


### beta (winsorized)

In [19]:
pd.DataFrame.from_dict(quin_portfolios["beta_winsorized"]).round(2).set_index("quintile").T

quintile,Quintile 1,Quintile 2,Quintile 3,Quintile 4,Quintile 5
Overall Return (%),1.64,1.82,1.92,1.75,2.34
ret |t|,6.15,5.83,4.58,3.34,3.79
ret p-value,0.0,0.0,0.0,0.0,0.0
Excess Return (%),1.03,1.21,1.32,1.14,1.73
xret |t|,5.36,7.19,4.96,3.19,3.95
xret p-value,0.0,0.0,0.0,0.0,0.0
CAPM Alpha (%),1.19,1.35,1.41,1.28,1.83
CAPM |t|,44.67,44.63,21.19,14.54,19.66
CAPM p-value,0.0,0.0,0.0,0.0,0.0
FF4 Alpha (%),1.07,1.27,1.28,1.22,1.74


### ivol (winsorized)

In [20]:
pd.DataFrame.from_dict(quin_portfolios["ivol_winsorized"]).round(2).set_index("quintile").T

quintile,Quintile 1,Quintile 2,Quintile 3,Quintile 4,Quintile 5
Overall Return (%),1.41,1.24,1.83,2.09,2.94
ret |t|,5.2,3.82,4.58,4.54,4.53
ret p-value,0.0,0.0,0.0,0.0,0.0
Excess Return (%),0.8,0.63,1.22,1.49,2.33
xret |t|,5.39,3.96,5.42,5.09,4.71
xret p-value,0.0,0.0,0.0,0.0,0.0
CAPM Alpha (%),0.87,0.76,1.43,1.45,2.59
CAPM |t|,35.33,17.16,18.17,24.17,29.81
CAPM p-value,0.0,0.0,0.0,0.0,0.0
FF4 Alpha (%),0.76,0.66,1.37,1.46,2.36


### mom (winsorized)

In [21]:
pd.DataFrame.from_dict(quin_portfolios["mom_winsorized"]).round(2).set_index("quintile").T

quintile,Quintile 1,Quintile 2,Quintile 3,Quintile 4,Quintile 5
Overall Return (%),2.19,1.75,1.53,1.36,2.63
ret |t|,3.85,4.42,3.97,3.97,5.58
ret p-value,0.0,0.0,0.0,0.0,0.0
Excess Return (%),1.58,1.14,0.92,0.75,2.03
xret |t|,3.87,4.85,3.9,3.78,5.99
xret p-value,0.0,0.0,0.0,0.0,0.0
CAPM Alpha (%),1.7,1.14,1.02,0.92,2.27
CAPM |t|,20.1,23.89,17.16,16.92,38.21
CAPM p-value,0.0,0.0,0.0,0.0,0.0
FF4 Alpha (%),1.74,1.06,0.91,0.81,2.03


***
## Q2 - betting against beta strategy

We create our Betting-Against-Beta (BAB) factor in the same style as Frazinni and Pederson

$$BAB_{t+1} = \dfrac{r_{L,t+1} - r_f}{\beta_{L,t}} - \dfrac{r_{H,t+1} - r_f}{\beta{H,t}}$$

Where
- $\beta_H$ is our high beta (quintile 5) portfolio beta
- $\beta_L$ is our low beta (quintile 1) portfolio beta

Since our quintile portfolios are equally-weighted, we can just take the average beta for all stocks in each respective portfolio to get out two beta measures

In [22]:
permnos = set(all_monthly_data.permno)
months = list(all_monthly_data[all_monthly_data["permno"] == 10107.0]["date"])
reporting_months = [m.strftime("%Y-%m-%d") for m in months[1:]] + ["2021-12-31"] # shifted by 1 month

# General function name (can do any type of quantile) but we actually sorting into quintiles for this assignment
# Given a factor and a quantile, this function returns the average value of the factor accross all stocks in the quantile
def quantize(factor: str, q: int, num_portfolios=5, name="Quintile"):
    factor_df = winsorized_factors[["permno", "date", "ret", "ret_t1", factor]]
    monthly_portfolio_returns = pd.DataFrame(columns = ["data_date", "return date", "ret", factor]) 

    # have two date reporting names to illustrate the use of the [n,m,l] method
    monthly_portfolio_returns["data_date"] = months
    monthly_portfolio_returns["return date"] = reporting_months
    monthly_portfolio_returns.set_index("data_date", inplace=True)
    
    # Every month we sort stocks based on the factor
    for month in months:
        month_df = factor_df[factor_df["date"] == month]
        month_df.sort_values(by=factor, inplace=True, ascending=True) # Sorted in ascending order, smallest quantile starts at index 0

        permno_quantiles = np.array_split(list(month_df["permno"]), num_portfolios) # Splits into 5 equal chunks


        # note that we use ret_t1 avoiding look-ahead bias (we only have the factors available at time t)
        quantile = permno_quantiles[q - 1]
        quantile_returns = factor_df[(factor_df["date"] == month) & (factor_df["permno"].isin(quantile))]["ret_t1"]

        quantile_factors = factor_df[(factor_df["date"] == month) & (factor_df["permno"].isin(quantile))][factor] # using time t
        
        monthly_portfolio_returns.loc[month, "ret"] = np.nanmean(quantile_returns) # in case of nan values
        monthly_portfolio_returns.loc[month, factor] = np.nanmean(quantile_factors) # in case of nan values
            
    # Generate returns
    return monthly_portfolio_returns

In [23]:
# Construct BAB factor

q1 = quantize("beta_winsorized", 1)
q5 = quantize("beta_winsorized", 5)

In [24]:
q1

Unnamed: 0_level_0,return date,ret,beta_winsorized
data_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1997-01-31,1997-02-28,-0.009215,
1997-02-28,1997-03-31,-0.01241,
1997-03-31,1997-04-30,0.083213,
1997-04-30,1997-05-30,0.07759,
1997-05-30,1997-06-30,0.002997,
...,...,...,...
2021-07-30,2021-08-31,0.000353,0.421021
2021-08-31,2021-09-30,-0.055445,0.420494
2021-09-30,2021-10-29,0.023769,0.445176
2021-10-29,2021-11-30,-0.046726,0.412406


In [25]:
ff3_factors

Unnamed: 0,DATEFF,SMB,HML,MKTRF,RF,UMD
0,1997-01-31,-0.0195,-0.0142,0.0499,0.0045,0.0196
1,1997-02-28,-0.0322,0.0567,-0.0049,0.0039,-0.0213
2,1997-03-31,-0.0036,0.0339,-0.0503,0.0043,0.0090
3,1997-04-30,-0.0577,0.0007,0.0404,0.0043,0.0484
4,1997-05-30,0.0519,-0.0413,0.0674,0.0049,-0.0517
...,...,...,...,...,...,...
295,2021-08-31,-0.0042,-0.0015,0.0291,0.0000,0.0245
296,2021-09-30,0.0071,0.0508,-0.0437,0.0000,0.0149
297,2021-10-29,-0.0235,-0.0049,0.0665,0.0000,0.0319
298,2021-11-30,-0.0132,-0.0045,-0.0155,0.0000,0.0088


In [27]:
# Our returns in each period is equal to the BAB
bab_portfolio_returns = pd.DataFrame(columns = ["date", "bab"])
bab_portfolio_returns["date"] = q1.index
rf = ff3_factors[["DATEFF", "RF"]].set_index("DATEFF").iloc[:-1]
bab_portfolio_returns.set_index("date", inplace=True)
bab_portfolio_returns["bab"] = ((q1["ret"] - rf["RF"]) / q1["beta_winsorized"]) - ((q5["ret"] - rf["RF"]) / q5["beta_winsorized"])
bab_portfolio_returns.dropna(inplace=True)

In [31]:
# Recreate Q1 b) reporting
bab_portfolio_summary = {"portfolio": [], 
                      "Overall Return (%)": [], "ret |t|": [], "ret p-value": [], 
                      "Excess Return (%)": [], "xret |t|": [], "xret p-value": [],
                      "CAPM Alpha (%)": [], "CAPM |t|": [], "CAPM p-value": [],
                      "FF4 Alpha (%)": [], "FF4 |t|": [], "FF4 p-value": [],
                      "Sharpe": [], "sharpe |t|": [], "sharpe p-value": []}

# Hedged Portfolios first
portfolio = "bab"
results = analyze_returns(portfolio, bab_portfolio_returns, bab_portfolio_returns.index)

# Append into hedged_portfolios
bab_portfolio_summary['portfolio'].append(portfolio)
bab_portfolio_summary["Overall Return (%)"].append(results["Net Return (%)"][0])
bab_portfolio_summary["ret |t|"].append(results["Net Return (%)"][1])
bab_portfolio_summary["ret p-value"].append(results["Net Return (%)"][2])

bab_portfolio_summary["Excess Return (%)"].append(results["Excess Return (%)"][0])
bab_portfolio_summary["xret |t|"].append(results["Excess Return (%)"][1])
bab_portfolio_summary["xret p-value"].append(results["Excess Return (%)"][2])

bab_portfolio_summary["CAPM Alpha (%)"].append(results["CAPM Alpha (%)"][0])
bab_portfolio_summary["CAPM |t|"].append(results["CAPM Alpha (%)"][1])
bab_portfolio_summary["CAPM p-value"].append(results["CAPM Alpha (%)"][2])

bab_portfolio_summary["FF4 Alpha (%)"].append(results["FF4 Alpha (%)"][0])
bab_portfolio_summary["FF4 |t|"].append(results["FF4 Alpha (%)"][1])
bab_portfolio_summary["FF4 p-value"].append(results["FF4 Alpha (%)"][2])

bab_portfolio_summary["Sharpe"].append(results["Sharpe"][0])
bab_portfolio_summary["sharpe |t|"].append(results["Sharpe"][1])
bab_portfolio_summary["sharpe p-value"].append(results["Sharpe"][2])

In [32]:
bab_portfolio_result = pd.DataFrame.from_dict(bab_portfolio_summary).round(2).set_index("portfolio").T

In [33]:
compare_df = pd.merge(bab_portfolio_result.reset_index(), hedged_result[["beta"]].reset_index())
compare_df.set_index("index", inplace=True)
compare_df

portfolio,bab,beta
index,Unnamed: 1_level_1,Unnamed: 2_level_1
Overall Return (%),2.46,-0.7
ret |t|,3.64,1.35
ret p-value,0.0,0.18
Excess Return (%),1.86,-1.31
xret |t|,2.61,1.85
xret p-value,0.01,0.07
CAPM Alpha (%),0.91,-0.64
CAPM |t|,2.33,7.25
CAPM p-value,0.02,0.0
FF4 Alpha (%),0.77,-0.67


Discussion about the comparison included in report

***
## Q3 - Factor-mimicking ETF

In [41]:
low_ivol = quintile_portfolios["ivol_winsorized"][["date", "Quintile 1"]]
low_ivol.columns = ["date", "low_ivol"] # rename q1 (low ivol) to low_ivol
low_ivol.set_index("date", inplace=True)

In [42]:
analyze_returns("low_ivol", low_ivol, low_ivol.index)

{'Net Return (%)': [1.4069851511822546,
  5.198299804946291,
  2.883549130411241e-07],
 'Excess Return (%)': [0.8013790905761939,
  5.389076201954244,
  1.0705671335580683e-07],
 'CAPM Alpha (%)': [0.8676658641316266,
  35.329236760083155,
  5.64584352513782e-141],
 'FF4 Alpha (%)': [0.7580827231051993,
  30.800915689025476,
  7.709650154588035e-120],
 'Sharpe': [0.2916646240844747, 4.728725180692668, 2.9056550174395243e-06]}

In [44]:
# Recreate Q1 b) reporting with long leg of hedge portfolio
# That is, we long low-volatility
low_ivol_portfolio_summary = {"portfolio": [], 
                          "Overall Return (%)": [], "ret |t|": [], "ret p-value": [], 
                          "Excess Return (%)": [], "xret |t|": [], "xret p-value": [],
                          "CAPM Alpha (%)": [], "CAPM |t|": [], "CAPM p-value": [],
                          "FF4 Alpha (%)": [], "FF4 |t|": [], "FF4 p-value": [],
                          "Sharpe": [], "sharpe |t|": [], "sharpe p-value": []}

# Hedged Portfolios first
portfolio = "low_ivol"
results = analyze_returns("low_ivol", low_ivol, low_ivol.index)

# Append into hedged_portfolios
low_ivol_portfolio_summary['portfolio'].append(portfolio)
low_ivol_portfolio_summary["Overall Return (%)"].append(results["Net Return (%)"][0])
low_ivol_portfolio_summary["ret |t|"].append(results["Net Return (%)"][1])
low_ivol_portfolio_summary["ret p-value"].append(results["Net Return (%)"][2])

low_ivol_portfolio_summary["Excess Return (%)"].append(results["Excess Return (%)"][0])
low_ivol_portfolio_summary["xret |t|"].append(results["Excess Return (%)"][1])
low_ivol_portfolio_summary["xret p-value"].append(results["Excess Return (%)"][2])

low_ivol_portfolio_summary["CAPM Alpha (%)"].append(results["CAPM Alpha (%)"][0])
low_ivol_portfolio_summary["CAPM |t|"].append(results["CAPM Alpha (%)"][1])
low_ivol_portfolio_summary["CAPM p-value"].append(results["CAPM Alpha (%)"][2])

low_ivol_portfolio_summary["FF4 Alpha (%)"].append(results["FF4 Alpha (%)"][0])
low_ivol_portfolio_summary["FF4 |t|"].append(results["FF4 Alpha (%)"][1])
low_ivol_portfolio_summary["FF4 p-value"].append(results["FF4 Alpha (%)"][2])

low_ivol_portfolio_summary["Sharpe"].append(results["Sharpe"][0])
low_ivol_portfolio_summary["sharpe |t|"].append(results["Sharpe"][1])
low_ivol_portfolio_summary["sharpe p-value"].append(results["Sharpe"][2])

In [45]:
low_ivol_portfolio_result = pd.DataFrame.from_dict(low_ivol_portfolio_summary).round(2).set_index("portfolio").T

In [46]:
low_ivol_portfolio_result

portfolio,low_ivol
Overall Return (%),1.41
ret |t|,5.2
ret p-value,0.0
Excess Return (%),0.8
xret |t|,5.39
xret p-value,0.0
CAPM Alpha (%),0.87
CAPM |t|,35.33
CAPM p-value,0.0
FF4 Alpha (%),0.76


Analysis on excess performance for investors included in report
- Discuss Excess return and confidence test (p value), and talk about how monthly transactions and management fees could eat away at the excess return

### b) Annual ETF Turnover

## Q6 - Have to dowload alot of stocks