In [69]:
import pandas as pd
pd.options.display.float_format = '{:.3f}'.format
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression 

In [70]:
equities = pd.read_excel("/Users/michelleyang/Desktop/proshares_analysis_data.xlsx", sheet_name='hedge_fund_series')

In [71]:
# Read the excel file for this homework

#Descriptions sheet. Note the use of .rename to correct column names that are not correctely named in the source file
desc = pd.read_excel('/Users/michelleyang/Desktop/proshares_analysis_data.xlsx','descriptions').rename(columns={'Unnamed: 0':'Symbol'}).set_index('Symbol')

#Hedge Fund Data
hf = pd.read_excel('/Users/michelleyang/Desktop/proshares_analysis_data.xlsx','hedge_fund_series').set_index('date')

#Merrill Lynch Factor Data
mf = pd.read_excel('/Users/michelleyang/Desktop/proshares_analysis_data.xlsx','merrill_factors').set_index('date')

#Other Data
od = pd.read_excel('/Users/michelleyang/Desktop/proshares_analysis_data.xlsx','other_data').set_index('date')

In [72]:
hf.head()

Unnamed: 0_level_0,HFRIFWI Index,MLEIFCTR Index,MLEIFCTX Index,HDG US Equity,QAI US Equity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-08-31,-0.032,-0.026,-0.026,-0.027,-0.006
2011-09-30,-0.039,-0.032,-0.033,-0.032,-0.022
2011-10-31,0.027,0.044,0.043,0.051,0.025
2011-11-30,-0.013,-0.012,-0.012,-0.029,-0.008
2011-12-31,-0.004,0.002,0.002,0.013,0.002


In [73]:
mf.head()

Unnamed: 0_level_0,SPY US Equity,USGG3M Index,EEM US Equity,EFA US Equity,EUO US Equity,IWM US Equity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-08-31,-0.055,0.0,-0.093,-0.088,-0.006,-0.089
2011-09-30,-0.069,0.0,-0.179,-0.108,0.142,-0.112
2011-10-31,0.109,-0.0,0.163,0.096,-0.07,0.151
2011-11-30,-0.004,0.0,-0.02,-0.022,0.055,-0.004
2011-12-31,0.01,0.0,-0.043,-0.022,0.076,0.005


# Question 2) 1.(a)(b)(c)

In [74]:
def mean_volatality_stat(hf, date_index = True, annualize = 12):
    if(date_index == True):
        mean_vol_table = hf.describe().loc[['mean','std']].transpose()
    if annualize:
        mean_vol_table['Annual_Mean'], mean_vol_table['Annual_Std']  = mean_vol_table['mean'] * annualize, mean_vol_table['std'] * np.sqrt(annualize)
        mean_vol_table['Sharpe Ratio'] = mean_vol_table['Annual_Mean'] / mean_vol_table['Annual_Std']
    return(mean_vol_table[['Annual_Mean','Annual_Std', 'Sharpe Ratio']])

display(mean_volatality_stat(hf))

Unnamed: 0,Annual_Mean,Annual_Std,Sharpe Ratio
HFRIFWI Index,0.051,0.062,0.826
MLEIFCTR Index,0.039,0.054,0.721
MLEIFCTX Index,0.037,0.054,0.695
HDG US Equity,0.028,0.056,0.498
QAI US Equity,0.025,0.045,0.56


# Question 2) 2.(a)

In [75]:
hf.skew()

HFRIFWI Index    -1.109
MLEIFCTR Index   -0.187
MLEIFCTX Index   -0.172
HDG US Equity    -0.180
QAI US Equity    -0.545
dtype: float64

# Question 2) 2.(b)

In [76]:
hf.kurtosis() - 3

HFRIFWI Index     3.730
MLEIFCTR Index   -0.571
MLEIFCTX Index   -0.624
HDG US Equity    -0.418
QAI US Equity    -0.330
dtype: float64

# Question 2) 2.(c)

In [77]:
hf.quantile(0.05)

HFRIFWI Index    -0.025
MLEIFCTR Index   -0.026
MLEIFCTX Index   -0.026
HDG US Equity    -0.029
QAI US Equity    -0.016
Name: 0.05, dtype: float64

# Question 2) 2.(d)

In [78]:
(hf[hf < hf.quantile(0.05)]).mean()

HFRIFWI Index    -0.039
MLEIFCTR Index   -0.033
MLEIFCTX Index   -0.033
HDG US Equity    -0.035
QAI US Equity    -0.026
dtype: float64

# Question 2) 2.(e)

In [79]:
#Maximum Drawdown
def maximumDrawdown(returns):
    cum_returns = (1 + returns).cumprod()
    rolling_max = cum_returns.cummax()
    drawdown = (cum_returns - rolling_max) / rolling_max

    max_drawdown = drawdown.min()
    end_date = drawdown.idxmin()
    summary = pd.DataFrame({'Max Drawdown': max_drawdown, 'Bottom': end_date})

    for col in drawdown:
        summary.loc[col,'Peak'] = (rolling_max.loc[:end_date[col],col]).idxmax()
        recovery = (drawdown.loc[end_date[col]:,col])
        try:
            summary.loc[col,'Recover'] = pd.to_datetime(recovery[recovery >= 0].index[0])
        except:
            summary.loc[col,'Recover'] = pd.to_datetime(None)

        summary['Peak'] = pd.to_datetime(summary['Peak'])
        try:
            summary['Duration (to Recover)'] = (summary['Recover'] - summary['Peak'])
        except:
            summary['Duration (to Recover)'] = None
            
        summary = summary[['Max Drawdown','Peak','Bottom','Recover','Duration (to Recover)']]

    return summary  

In [80]:
maximumDrawdown(hf)

Unnamed: 0,Max Drawdown,Peak,Bottom,Recover,Duration (to Recover)
HFRIFWI Index,-0.115,2019-12-31,2020-03-31,2020-08-31,244 days
MLEIFCTR Index,-0.084,2019-12-31,2020-03-31,2020-11-30,335 days
MLEIFCTX Index,-0.084,2019-12-31,2020-03-31,2020-11-30,335 days
HDG US Equity,-0.088,2020-01-31,2020-03-31,2020-11-30,304 days
QAI US Equity,-0.076,2019-12-31,2020-03-31,2020-07-31,213 days


# Question 2) 3.(a)(b)(c)

In [81]:
# We first need to define what is the regressor (x or exogenous variable(s)) 
# and what is the regressand (y or endogenous variable) in the regression model

exog = pd.DataFrame(mf['SPY US Equity'])
endog = pd.DataFrame(hf['HFRIFWI Index'])

# We will run this regression with an intercept. 
# In-case you don't want to include an intercept (exercise for extension) you can skip this step
# sm.add_constant adds a constant unit vector to the exogenous vector

exog = sm.add_constant(exog)
display(exog.head())

Unnamed: 0_level_0,const,SPY US Equity
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2011-08-31,1.0,-0.055
2011-09-30,1.0,-0.069
2011-10-31,1.0,0.109
2011-11-30,1.0,-0.004
2011-12-31,1.0,0.01


In [82]:
model = sm.regression.linear_model.OLS(endog, exog)

# Fit the model to the data

fitted_model = model.fit()

# Display and analyze the summary of the model

print(fitted_model.summary())

                            OLS Regression Results                            
Dep. Variable:          HFRIFWI Index   R-squared:                       0.753
Model:                            OLS   Adj. R-squared:                  0.751
Method:                 Least Squares   F-statistic:                     366.0
Date:                Mon, 27 Jun 2022   Prob (F-statistic):           2.98e-38
Time:                        22:53:48   Log-Likelihood:                 404.50
No. Observations:                 122   AIC:                            -805.0
Df Residuals:                     120   BIC:                            -799.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const            -0.0006      0.001     -0.733

In [83]:
# To access the coefficients of the regression, use the following command
# params[j] where j = 0 for intercept coefficient and {1,2...,N} for Nth Beta Coefficient

alpha = fitted_model.params[0]
beta1 = fitted_model.params[1]

#For the R-squared, use:

rsq = fitted_model.rsquared

#To calculate the residuals (errors) use the .resid command. Variance of residuals is used to calculate information ratio

residuals = fitted_model.resid
var_resid = residuals.std()

In [84]:
def regressionAnalysis(exog,endog_df,intercept=False,prediction=False,displaySummary=[],displayRatios=False):
    
    if isinstance(exog,pd.Series):
        exog = pd.DataFrame(exog)
    if isinstance(endog_df,pd.Series):
        endog_df = pd.DataFrame(endog_df)
    if displaySummary==True:
        displaySummary = endog_df.columns

    if intercept:
        exog = sm.tools.add_constant(exog)

    ratios = []
    
    # loop through each column in data (y-variable DataFrame)
    
    for col,endog in pd.DataFrame(endog_df).iteritems():
        model = sm.regression.linear_model.OLS(endog,exog).fit()
        
        fitted_val = model.predict(exog).mean()
        betas = []
        treynor_ratios = []

        if intercept:
            exogCols = exog.columns[1:]
        else:
            exogCols = exog.columns
        for x in exogCols:
            betas.append('{:.3f}'.format(model.params[x]))
            treynor_ratios.append('{:.3f}'.format((endog.mean()/model.params[x])*12))
        if len(betas)==1:
            betas = betas[0]
            treynor_ratios = treynor_ratios[0]
        if intercept:
            alpha = model.params['const']
            info_ratio = (alpha/model.resid.std())*np.sqrt(12)
        else:
            alpha = None
            info_ratio = None
        r_squared = model.rsquared
        tracking_err = model.resid.std()
        ratios.append([fitted_val,alpha,info_ratio,betas,treynor_ratios,r_squared,tracking_err])

        if col in displaySummary:
            print(model.summary())
    
    regRatios = pd.DataFrame(list(zip(*ratios)),
                             columns=endog_df.columns,
                             index=['avg_fitted_val',
                                    'alpha','information_ratio',
                                    'beta','treynor_ratio',
                                    'r_squared','tracking_error'])
    
    if prediction:
        return model.predict(exog)

    if displayRatios:
        return regRatios

    
regressionAnalysis(mf['SPY US Equity'],hf,intercept=True,displayRatios=True)

Unnamed: 0,HFRIFWI Index,MLEIFCTR Index,MLEIFCTX Index,HDG US Equity,QAI US Equity
avg_fitted_val,0.004,0.003,0.003,0.002,0.002
alpha,-0.001,-0.001,-0.001,-0.002,-0.001
information_ratio,-0.242,-0.615,-0.671,-1.011,-0.687
beta,0.394,0.359,0.358,0.369,0.285
treynor_ratio,0.129,0.108,0.104,0.076,0.089
r_squared,0.753,0.816,0.815,0.786,0.719
tracking_error,0.009,0.007,0.007,0.008,0.007
