In [32]:
import pandas as pd
pd.options.display.float_format = '{:.3f}'.format
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression 

In [33]:
equities = pd.read_excel("/Users/michelleyang/Desktop/proshares_analysis_data.xlsx", sheet_name='hedge_fund_series')

In [34]:
# Read the excel file for this homework

#Descriptions sheet. Note the use of .rename to correct column names that are not correctely named in the source file
desc = pd.read_excel('/Users/michelleyang/Desktop/proshares_analysis_data.xlsx','descriptions').rename(columns={'Unnamed: 0':'Symbol'}).set_index('Symbol')

#Hedge Fund Data
hf = pd.read_excel('/Users/michelleyang/Desktop/proshares_analysis_data.xlsx','hedge_fund_series').set_index('date')

#Merrill Lynch Factor Data
mf = pd.read_excel('/Users/michelleyang/Desktop/proshares_analysis_data.xlsx','merrill_factors').set_index('date')

#Other Data
od = pd.read_excel('/Users/michelleyang/Desktop/proshares_analysis_data.xlsx','other_data').set_index('date')

In [35]:
hf.head()

Unnamed: 0_level_0,HFRIFWI Index,MLEIFCTR Index,MLEIFCTX Index,HDG US Equity,QAI US Equity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-08-31,-0.032,-0.026,-0.026,-0.027,-0.006
2011-09-30,-0.039,-0.032,-0.033,-0.032,-0.022
2011-10-31,0.027,0.044,0.043,0.051,0.025
2011-11-30,-0.013,-0.012,-0.012,-0.029,-0.008
2011-12-31,-0.004,0.002,0.002,0.013,0.002


In [36]:
mf.head()

Unnamed: 0_level_0,SPY US Equity,USGG3M Index,EEM US Equity,EFA US Equity,EUO US Equity,IWM US Equity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-08-31,-0.055,0.0,-0.093,-0.088,-0.006,-0.089
2011-09-30,-0.069,0.0,-0.179,-0.108,0.142,-0.112
2011-10-31,0.109,-0.0,0.163,0.096,-0.07,0.151
2011-11-30,-0.004,0.0,-0.02,-0.022,0.055,-0.004
2011-12-31,0.01,0.0,-0.043,-0.022,0.076,0.005


# Question 2) 1.(a)(b)(c)

In [37]:
def mean_volatality_stat(hf, date_index = True, annualize = 12):
    if(date_index == True):
        mean_vol_table = hf.describe().loc[['mean','std']].transpose()
    if annualize:
        mean_vol_table['Annual_Mean'], mean_vol_table['Annual_Std']  = mean_vol_table['mean'] * annualize, mean_vol_table['std'] * np.sqrt(annualize)
        mean_vol_table['Sharpe Ratio'] = mean_vol_table['Annual_Mean'] / mean_vol_table['Annual_Std']
    return(mean_vol_table[['Annual_Mean','Annual_Std', 'Sharpe Ratio']])

display(mean_volatality_stat(hf))

Unnamed: 0,Annual_Mean,Annual_Std,Sharpe Ratio
HFRIFWI Index,0.051,0.062,0.826
MLEIFCTR Index,0.039,0.054,0.721
MLEIFCTX Index,0.037,0.054,0.695
HDG US Equity,0.028,0.056,0.498
QAI US Equity,0.025,0.045,0.56


# Question 2) 2.(a)

In [38]:
hf.skew()

HFRIFWI Index    -1.109
MLEIFCTR Index   -0.187
MLEIFCTX Index   -0.172
HDG US Equity    -0.180
QAI US Equity    -0.545
dtype: float64

# Question 2) 2.(b)

In [39]:
hf.kurtosis() - 3

HFRIFWI Index     3.730
MLEIFCTR Index   -0.571
MLEIFCTX Index   -0.624
HDG US Equity    -0.418
QAI US Equity    -0.330
dtype: float64

# Question 2) 2.(c)

In [40]:
hf.quantile(0.05)

HFRIFWI Index    -0.025
MLEIFCTR Index   -0.026
MLEIFCTX Index   -0.026
HDG US Equity    -0.029
QAI US Equity    -0.016
Name: 0.05, dtype: float64

# Question 2) 2.(d)

In [41]:
(hf[hf < hf.quantile(0.05)]).mean()

HFRIFWI Index    -0.039
MLEIFCTR Index   -0.033
MLEIFCTX Index   -0.033
HDG US Equity    -0.035
QAI US Equity    -0.026
dtype: float64

# Question 2) 2.(e)

In [42]:
#Maximum Drawdown
def maximumDrawdown(returns):
    cum_returns = (1 + returns).cumprod()
    rolling_max = cum_returns.cummax()
    drawdown = (cum_returns - rolling_max) / rolling_max

    max_drawdown = drawdown.min()
    end_date = drawdown.idxmin()
    summary = pd.DataFrame({'Max Drawdown': max_drawdown, 'Bottom': end_date})

    for col in drawdown:
        summary.loc[col,'Peak'] = (rolling_max.loc[:end_date[col],col]).idxmax()
        recovery = (drawdown.loc[end_date[col]:,col])
        try:
            summary.loc[col,'Recover'] = pd.to_datetime(recovery[recovery >= 0].index[0])
        except:
            summary.loc[col,'Recover'] = pd.to_datetime(None)

        summary['Peak'] = pd.to_datetime(summary['Peak'])
        try:
            summary['Duration (to Recover)'] = (summary['Recover'] - summary['Peak'])
        except:
            summary['Duration (to Recover)'] = None
            
        summary = summary[['Max Drawdown','Peak','Bottom','Recover','Duration (to Recover)']]

    return summary  

In [43]:
maximumDrawdown(hf)

Unnamed: 0,Max Drawdown,Peak,Bottom,Recover,Duration (to Recover)
HFRIFWI Index,-0.115,2019-12-31,2020-03-31,2020-08-31,244 days
MLEIFCTR Index,-0.084,2019-12-31,2020-03-31,2020-11-30,335 days
MLEIFCTX Index,-0.084,2019-12-31,2020-03-31,2020-11-30,335 days
HDG US Equity,-0.088,2020-01-31,2020-03-31,2020-11-30,304 days
QAI US Equity,-0.076,2019-12-31,2020-03-31,2020-07-31,213 days


# Question 2) 3.(a)(b)(c)

In [44]:
# Again, define what is the regressor (x or exogenous variable(s)) 
# and what is the regressand (y or endogenous variable) in the regression model
# In the default setting, Scikit-Learn includes an intercept. So no need to explicitly define an intercept

exog = pd.DataFrame(mf['SPY US Equity'])
endog = pd.DataFrame(hf['HFRIFWI Index'])

# Initalize the Model

model = LinearRegression()

# Fit the model, notice the difference in syntax from the statsmodels

fitted_model = model.fit(exog, endog)

In [45]:
# To get the coefficients:

alpha = fitted_model.intercept_
beta = fitted_model.coef_[0]

# For R-squared, call the .score method:

rsq = fitted_model.score(exog, endog)

# sklearn does not return the residuals, so we need to build them
yfit = model.predict(exog)
residuals = endog - yfit
var_resid = residuals.std()

In [46]:
def get_ols_metrics(regressors, targets, annualization=1, ignorenan=True):
    # ensure regressors and targets are pandas dataframes, as expected
    if not isinstance(regressors, pd.DataFrame):
        regressors = regressors.to_frame()
    if not isinstance(targets, pd.DataFrame):
        targets = targets.to_frame()

    # align the targets and regressors on the same dates
    df_aligned = targets.join(regressors, how='inner', lsuffix='y ')
    Y = df_aligned[targets.columns]
    Xset = df_aligned[regressors.columns]

    reg = pd.DataFrame(index=targets.columns)
    for col in Y.columns:
        y = Y[col]
        
        if ignorenan:
            # ensure we use only non-NaN dates
            alldata = Xset.join(y,lsuffix='X')
            mask = alldata.notnull().all(axis=1)
            y = y[mask]
            X = Xset[mask]
        else:
            X = Xset

        model = LinearRegression().fit(X, y)
        reg.loc[col, 'alpha'] = model.intercept_ * annualization
        reg.loc[col, regressors.columns] = model.coef_
        reg.loc[col, 'r-squared'] = model.score(X, y)
        reg.loc[col, 'beta'] = model.coef_[0]

        # sklearn does not return the residuals, so we need to build them
        yfit = model.predict(X)
        residuals = y - yfit

        # Treynor Ratio is only defined for univariate regression
        if Xset.shape[1] == 1:
            reg.loc[col,'Treynor Ratio'] = (y.mean() / model.coef_) * annualization

        
        # if intercept =0, numerical roundoff will nonetheless show nonzero Info Ratio
        num_roundoff = 1e-12
        if np.abs(model.intercept_) < num_roundoff:
            reg.loc[col, 'Info Ratio'] = None
        else:
            reg.loc[col, 'Info Ratio'] = (model.intercept_ / residuals.std()) * np.sqrt(annualization)

    return reg

In [47]:
get_ols_metrics(mf, hf, annualization=1, ignorenan=True)

Unnamed: 0,alpha,SPY US Equity,USGG3M Index,EEM US Equity,EFA US Equity,EUO US Equity,IWM US Equity,r-squared,beta,Info Ratio
HFRIFWI Index,0.001,0.072,-0.401,0.072,0.106,0.022,0.131,0.856,0.072,0.17
MLEIFCTR Index,0.0,0.039,0.536,0.069,0.14,0.036,0.112,0.948,0.039,0.026
MLEIFCTX Index,-0.0,0.037,0.51,0.069,0.14,0.036,0.111,0.948,0.037,-0.0
HDG US Equity,-0.001,0.041,0.575,0.066,0.144,0.035,0.116,0.91,0.041,-0.186
QAI US Equity,-0.001,0.142,0.568,0.067,0.049,-0.003,0.024,0.788,0.142,-0.112
