# Assignment 4

In [1]:
import pandas as pd
from pandasql import sqldf
import matplotlib.pyplot as plt
import numpy as np
import warnings
from sklearn import linear_model
import statsmodels.api as sm
import scipy.stats as stats
from math import sqrt

warnings.filterwarnings('ignore')

In [2]:
# read raw data
all_monthly_data = pd.read_sas("CA.sas7bdat", encoding = 'ISO-8859-1')

In [3]:
all_monthly_data.head()

Unnamed: 0,permno,date,ret,ret_t1,TICKER,COMNAM,PRC,SHROUT,datadate,fyearq,...,bk2mkt_winsorized_zscore,ep1_winsorized_zscore,ep2_winsorized_zscore,gvkey,beta,ivol,mom,beta_winsorized,ivol_winsorized,mom_winsorized
0,10107.0,1997-01-31,0.234493,-0.04411,MSFT,MICROSOFT CORP,102.0,1198000.0,1996-09-30,1997.0,...,-1.044416,-0.066718,0.274438,12141,,0.012851,,,0.012851,
1,10107.0,1997-02-28,-0.04411,-0.05961,MSFT,MICROSOFT CORP,97.5,1198000.0,1996-09-30,1997.0,...,-1.052875,0.001182,0.33749,12141,,0.014933,,,0.014933,
2,10107.0,1997-03-31,-0.05961,0.325153,MSFT,MICROSOFT CORP,91.6875,1191000.0,1996-09-30,1997.0,...,-1.048655,-0.001393,0.309762,12141,,0.016597,,,0.016597,
3,10107.0,1997-04-30,0.325153,0.020576,MSFT,MICROSOFT CORP,121.5,1191000.0,1996-12-31,1997.0,...,-1.080912,0.003298,-0.021655,12141,,0.022905,,,0.022905,
4,10107.0,1997-05-30,0.020576,0.019153,MSFT,MICROSOFT CORP,124.0,1191000.0,1996-12-31,1997.0,...,-1.023077,-0.078655,-0.149326,12141,,0.011146,,,0.011146,


***
## Task 1 - [m, n, l] Quitile Portfolios
- We wish to use the [m,n,l] month rule to construct a quantile portfolio for each of the winsorized factors from A3 for Jan 2000-Nov 2021
- We forecast one month ahead returns
- **Our m,n,l parameters have n = 0, l = 1 so we assume no waiting period and a holding period of 1 month**
- Our estimation window for each time t is the data that we have before, and at time t

In [4]:
winsorized_factors = all_monthly_data[["permno", "date", "ret", "ret_t1", "lnSize_winsorized", "bk2mkt_winsorized", "ep1_winsorized", "beta_winsorized", "ivol_winsorized", "mom_winsorized"]]
winsorized_factors = winsorized_factors.loc[winsorized_factors["date"] >= "1999-12-01"]
winsorized_factors.reset_index(inplace=True)
winsorized_factors.drop("index", inplace=True, axis=1)
winsorized_factors

Unnamed: 0,permno,date,ret,ret_t1,lnSize_winsorized,bk2mkt_winsorized,ep1_winsorized,beta_winsorized,ivol_winsorized,mom_winsorized
0,10107.0,1999-12-31,0.282306,-0.161670,27.124242,0.045579,0.003655,1.396267,0.022570,0.683690
1,10107.0,2000-01-31,-0.161670,-0.086840,26.947898,0.060114,0.004338,1.431619,0.023474,0.118604
2,10107.0,2000-02-29,-0.086840,0.188811,26.857048,0.065832,0.004751,1.403349,0.017743,0.190712
3,10107.0,2000-03-31,0.188811,-0.343520,27.045764,0.054510,0.003934,1.429971,0.031291,0.185533
4,10107.0,2000-04-28,-0.343520,-0.103040,26.628771,0.094234,0.006637,1.599790,0.037307,-0.142166
...,...,...,...,...,...,...,...,...,...,...
20453,93436.0,2021-07-30,0.011034,0.070605,27.245856,0.033832,0.000644,2.042673,0.017695,1.401589
20454,93436.0,2021-08-31,0.070605,0.054042,27.325881,0.031230,0.000594,2.057498,0.017366,0.476442
20455,93436.0,2021-09-30,0.054042,0.436530,27.380740,0.029563,0.000563,1.996046,0.011945,0.807639
20456,93436.0,2021-10-29,0.436530,0.027612,27.743234,0.022171,0.001021,2.169457,0.026497,1.870866


### Quintile Sorting Function
- We create a function, as suggested from the tutorial, to help us sort our factors into quintile portfolios

In [5]:
# As suggested from the tutorial, we will create a function to help us create our sorted quantile portfolios
permnos = set(winsorized_factors.permno)
months = list(winsorized_factors[winsorized_factors["permno"] == 10107.0]["date"])
reporting_months = [m.strftime("%Y-%m-%d") for m in months[1:]] + ["2021-12-31"] # shifted by 1 month

# General function name (can do any type of quantile) but we actually sorting into quintiles for this assignment
# This function sorts by factor at the end of every month and then equally holds through the next month and reports the returns portfolio return
def quantile_sort(factor: str, num_portfolios=5, name="Quintile"):
    factor_df = winsorized_factors[["permno", "date", "ret", "ret_t1", factor]]
    monthly_portfolio_returns = pd.DataFrame(columns = ["data_date", "return date"] + [f"{name} {i}" for i in range(1, num_portfolios+1)]) 

    # have two date reporting names to illustrate the use of the [n,m,l] method
    monthly_portfolio_returns["data_date"] = months
    monthly_portfolio_returns["return date"] = reporting_months
    monthly_portfolio_returns.set_index("data_date", inplace=True)
    
    # Every month we sort stocks based on the factor
    for month in months:
        month_df = factor_df[factor_df["date"] == month]
        month_df.sort_values(by=factor, inplace=True, ascending=True) # Sorted in ascending order, smallest quantile starts at index 0

        permno_quantiles = np.array_split(list(month_df["permno"]), num_portfolios) # Splits into 5 equal chunks
        for (i, quantile) in enumerate(permno_quantiles):

            # note that we use ret_t1 avoiding look-ahead bias (we only have the factors available at time t)
            quantile_returns = factor_df[(factor_df["date"] == month) & (factor_df["permno"].isin(quantile))]["ret_t1"]
            
            monthly_portfolio_returns.loc[month, f"{name} {i+1}"] = np.nanmean(quantile_returns) # in case of nan values
            
    # Generate returns
    return monthly_portfolio_returns

## Quantile Portfolios
- Using our function, we are able to sort our portfolios into quantiles by their factor values

The way our code handles the data avoids look-ahead bias for return prediction because we sort using the factor results from time t and only report the t+1 returns (from `ret_t1`. This is equivalent to us forming an equally-weighted index at time t using the factor data we have access to, then holding for a month until time t+1 and noting down returns during the period. This way, we don't make any decisions using future data, so there is no look-ahead bias.

**This is illustrated below**

Note the difference between `data_date` (the date where factors were sorted) and `return_date` (returns date)

In [6]:
quantile_sort("lnSize_winsorized")

Unnamed: 0_level_0,return date,Quintile 1,Quintile 2,Quintile 3,Quintile 4,Quintile 5
data_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-12-31,2000-01-31,-0.081791,0.042169,-0.04583,-0.004732,-0.029213
2000-01-31,2000-02-29,0.694619,0.120178,0.055713,0.27543,0.101911
2000-02-29,2000-03-31,-0.011919,0.118736,0.063907,0.053541,0.071465
2000-03-31,2000-04-28,-0.020628,-0.059524,0.010407,-0.063702,-0.037518
2000-04-28,2000-05-31,0.02102,0.014569,-0.029671,-0.142351,-0.065683
...,...,...,...,...,...,...
2021-07-30,2021-08-31,0.027984,0.046413,0.031173,0.006084,0.044354
2021-08-31,2021-09-30,-0.041886,-0.024441,-0.069276,-0.052418,-0.050429
2021-09-30,2021-10-29,0.091362,0.043535,0.083113,0.029173,0.0564
2021-10-29,2021-11-30,-0.063331,0.009399,-0.05942,0.026617,0.013328


In [13]:
## Next, we need to save our results to excel
factors = ["lnSize_winsorized", "bk2mkt_winsorized", "ep1_winsorized", "beta_winsorized", "ivol_winsorized", "mom_winsorized"]

quintile_portfolios = {}

for factor in factors:
    quintile_portfolio = quantile_sort(factor, num_portfolios=5, name="Quintile")

    # Rename some columns to match assignment format requirement
    quintile_portfolio.reset_index(inplace=True)
    quintile_portfolio.rename(columns = {"return date": "date"}, inplace=True)
    quintile_portfolio.drop(["data_date"], axis=1, inplace=True)

    # Save
    quintile_portfolios[factor] = quintile_portfolio

with pd.ExcelWriter('datasets-A3.xlsx') as writer:
    # Write each DataFrame to its own sheet
    for sheet_name, df in quintile_portfolios.items():
        df.to_excel(writer, sheet_name=sheet_name.split("_")[0] + " quintile returns", index=False)


### Q1 b) 
- Now we form the hedge portfolio using quantiles 1 and 5 for each of the factors

**Which ones we short and long**

Basically, we want to **short** the lesser-value stocks and **long** the higher-value factor. By value I mean does a higher value in the factor net more returns?

Note that our quintiles are shorted from smallest to highest (factor value)

- **lnSize_winsorized**
    - We know from class that small stocks tend to outperform.
    - So we long quintile 1 and short quintile 5.
- **bk2mkt_winsorized**
    - We know from class that value firms (high book to market) tend to outperform grown firms (low book to market).
    - So we long quintile 5 and short quintile 1.
- **ep1_winsorized**
    - Note ep1 is IBQ (Income before extraordinary items) / Market equity (from Assignment 2)
    - ep1 is a measure of the company's income (earnings) per dollar valuation (market cap)
    - In theory, a company with a higher earnings to valuation ratio should perform better (is undervalued) compared to a company with a low ratio (overvalued)
    - So we long quintile 5 and short quintile 1
- **beta_winsorized**
    - Frazzini and Pedersen argue that high-beta stocks are overbought due to the inherent leverage they offer
    - Therefore high beta stocks generate proportionally lower non-leveraged returns 
    - So we Long low beta and short high beta
    - This means we long quintile 1 and short quintile 5
- **ivol_winsorized**
    - Idiosyncratic risk is risk that is associated with the stock itself, not to the market
    - Ang. et al. found that high idiosyncratic volatility have lower returns
    - Hou and Loh argue that investors' lottery preferences, market frictions, etc add excess demand for high ivol stocks, bidding up prices and reducing average returns
    - Therefore we want to short high ivol and long low ivol
    - So we short portfolio 5 and long portfolio 1
- **mom_winsorized**
    - 

In [25]:
print("-----------------------------")
for factor in factors:
    print(factor)
    print(quintile_portfolios[factor].set_index("date").mean())
    print("-----------------------------")

-----------------------------
lnSize_winsorized
Quintile 1    0.036485
Quintile 2    0.021628
Quintile 3    0.014933
Quintile 4    0.010212
Quintile 5    0.010225
dtype: object
-----------------------------
bk2mkt_winsorized
Quintile 1    0.021986
Quintile 2     0.01566
Quintile 3    0.018216
Quintile 4    0.018835
Quintile 5    0.019911
dtype: object
-----------------------------
ep1_winsorized
Quintile 1    0.027387
Quintile 2    0.016552
Quintile 3      0.0142
Quintile 4     0.01682
Quintile 5    0.019343
dtype: object
-----------------------------
beta_winsorized
Quintile 1     0.01638
Quintile 2     0.01818
Quintile 3    0.019231
Quintile 4    0.017455
Quintile 5    0.023376
dtype: object
-----------------------------
ivol_winsorized
Quintile 1     0.01407
Quintile 2    0.012358
Quintile 3    0.018267
Quintile 4    0.020936
Quintile 5    0.029406
dtype: object
-----------------------------
mom_winsorized
Quintile 1    0.021867
Quintile 2    0.017472
Quintile 3    0.015288
Quintile