# Group Project
#### **FINN43815 - Python for Finance**

In [138]:
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.stats import kurtosis, skew
import yfinance as yf

### HML Strategy

#### 1. Import Data

In [139]:
data = pd.read_csv('cleaned_data.csv')


Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.



In [140]:
# drop unnecessary columns
data = data[["date", "permno", "price", "p2b"]]
data

Unnamed: 0,date,permno,price,p2b
0,1990-01-31,0111145D UN Equity,20.6875,
1,1990-02-28,0111145D UN Equity,22.0000,
2,1990-03-30,0111145D UN Equity,19.5000,
3,1990-04-30,0111145D UN Equity,17.6250,1.5612
4,1990-05-31,0111145D UN Equity,19.8125,1.7550
...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,2.9386
514692,2022-05-31,J UN Equity,140.0900,2.9712
514693,2022-06-30,J UN Equity,127.1300,2.6964
514694,2022-07-29,J UN Equity,137.3000,2.9522


#### 2. Calculate monthly returns

In [141]:
data['rets'] = data.groupby('permno')['price'].transform(pd.Series.pct_change)
data.head()





Unnamed: 0,date,permno,price,p2b,rets
0,1990-01-31,0111145D UN Equity,20.6875,,
1,1990-02-28,0111145D UN Equity,22.0,,0.063444
2,1990-03-30,0111145D UN Equity,19.5,,-0.113636
3,1990-04-30,0111145D UN Equity,17.625,1.5612,-0.096154
4,1990-05-31,0111145D UN Equity,19.8125,1.755,0.124113


#### 3. Sort all stocks into 10 equal groups by size

In [142]:
# Filter out null values fro return and cap
data = data.loc[(~data['rets'].isnull()) & ~(data['p2b'].isnull())]
data 

Unnamed: 0,date,permno,price,p2b,rets
3,1990-04-30,0111145D UN Equity,17.6250,1.5612,-0.096154
4,1990-05-31,0111145D UN Equity,19.8125,1.7550,0.124113
5,1990-06-29,0111145D UN Equity,18.9375,1.6775,-0.044164
6,1990-07-31,0111145D UN Equity,19.6250,1.7262,0.036304
7,1990-08-31,0111145D UN Equity,20.3750,1.7922,0.038217
...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,2.9386,0.005370
514692,2022-05-31,J UN Equity,140.0900,2.9712,0.011115
514693,2022-06-30,J UN Equity,127.1300,2.6964,-0.092512
514694,2022-07-29,J UN Equity,137.3000,2.9522,0.079997


In [143]:
# sort all stocks into 10 different groups
data.loc[:,'port'] = data.groupby('date')['p2b'].transform(pd.qcut, q=10, labels=range(1,11)).astype(str)
data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,date,permno,price,p2b,rets,port
3,1990-04-30,0111145D UN Equity,17.6250,1.5612,-0.096154,5
4,1990-05-31,0111145D UN Equity,19.8125,1.7550,0.124113,5
5,1990-06-29,0111145D UN Equity,18.9375,1.6775,-0.044164,5
6,1990-07-31,0111145D UN Equity,19.6250,1.7262,0.036304,5
7,1990-08-31,0111145D UN Equity,20.3750,1.7922,0.038217,6
...,...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,2.9386,0.005370,5
514692,2022-05-31,J UN Equity,140.0900,2.9712,0.011115,5
514693,2022-06-30,J UN Equity,127.1300,2.6964,-0.092512,5
514694,2022-07-29,J UN Equity,137.3000,2.9522,0.079997,5


#### 4. Calculate the trading period returns for each portfolio

In [144]:
# add columns for future retuns
data.loc[:,'rets_t1']=data.groupby('permno')['rets'].transform('shift', periods=-1)
data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,date,permno,price,p2b,rets,port,rets_t1
3,1990-04-30,0111145D UN Equity,17.6250,1.5612,-0.096154,5,0.124113
4,1990-05-31,0111145D UN Equity,19.8125,1.7550,0.124113,5,-0.044164
5,1990-06-29,0111145D UN Equity,18.9375,1.6775,-0.044164,5,0.036304
6,1990-07-31,0111145D UN Equity,19.6250,1.7262,0.036304,5,0.038217
7,1990-08-31,0111145D UN Equity,20.3750,1.7922,0.038217,6,0.067485
...,...,...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,2.9386,0.005370,5,0.011115
514692,2022-05-31,J UN Equity,140.0900,2.9712,0.011115,5,-0.092512
514693,2022-06-30,J UN Equity,127.1300,2.6964,-0.092512,5,0.079997
514694,2022-07-29,J UN Equity,137.3000,2.9522,0.079997,5,-0.093664


In [145]:
# calculate the achieved mean return return for each month and portfolio
port_df = data.loc[~data['port'].isnull()].groupby(['date','port'])['rets_t1'].mean().reset_index()
port_df

Unnamed: 0,date,port,rets_t1
0,1990-02-28,1,0.058373
1,1990-02-28,10,0.032882
2,1990-02-28,2,0.058850
3,1990-02-28,3,0.016039
4,1990-02-28,4,0.069584
...,...,...,...
3905,2022-08-30,5,
3906,2022-08-30,6,
3907,2022-08-30,7,
3908,2022-08-30,8,


#### 5. Calculate the returns on HML strategy

In [146]:
port_df = port_df.pivot(index = 'date', columns = 'port', values = 'rets_t1').reset_index()

In [147]:
port_df

port,date,1,10,2,3,4,5,6,7,8,9
0,1990-02-28,0.058373,0.032882,0.058850,0.016039,0.069584,0.067306,0.064337,0.034685,0.080269,0.072513
1,1990-03-30,-0.022048,-0.006201,-0.050032,-0.029563,-0.034384,-0.014393,-0.034759,-0.030832,-0.012337,-0.038187
2,1990-04-30,0.083160,0.134593,0.090239,0.087674,0.086855,0.098119,0.090896,0.096373,0.124065,0.142879
3,1990-05-31,-0.018901,0.037670,-0.015339,-0.007718,-0.028059,-0.021199,-0.015391,0.007216,-0.000568,0.030464
4,1990-06-29,-0.039871,-0.028269,-0.032141,-0.024673,-0.011727,-0.017465,-0.011508,-0.030474,-0.008174,-0.042653
...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,0.037972,0.006857,0.043861,0.026903,0.028772,0.011848,0.012112,-0.012093,0.003952,-0.025084
387,2022-05-31,-0.141133,-0.070887,-0.119767,-0.102332,-0.120173,-0.101374,-0.105185,-0.105989,-0.091582,-0.078717
388,2022-06-30,0.096028,0.113822,0.087731,0.066327,0.066367,0.076868,0.072401,0.086043,0.112084,0.107559
389,2022-07-29,-0.021782,-0.037529,-0.011224,-0.009498,-0.020434,-0.021681,-0.028096,-0.018516,-0.050197,-0.039783


In [148]:
port_df['HML_rets'] = port_df['1']-port_df['10']
port_df['HML_rets'] = port_df['HML_rets'].transform('shift', periods=1)
port_df

port,date,1,10,2,3,4,5,6,7,8,9,HML_rets
0,1990-02-28,0.058373,0.032882,0.058850,0.016039,0.069584,0.067306,0.064337,0.034685,0.080269,0.072513,
1,1990-03-30,-0.022048,-0.006201,-0.050032,-0.029563,-0.034384,-0.014393,-0.034759,-0.030832,-0.012337,-0.038187,0.025491
2,1990-04-30,0.083160,0.134593,0.090239,0.087674,0.086855,0.098119,0.090896,0.096373,0.124065,0.142879,-0.015846
3,1990-05-31,-0.018901,0.037670,-0.015339,-0.007718,-0.028059,-0.021199,-0.015391,0.007216,-0.000568,0.030464,-0.051434
4,1990-06-29,-0.039871,-0.028269,-0.032141,-0.024673,-0.011727,-0.017465,-0.011508,-0.030474,-0.008174,-0.042653,-0.056571
...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,0.037972,0.006857,0.043861,0.026903,0.028772,0.011848,0.012112,-0.012093,0.003952,-0.025084,0.013869
387,2022-05-31,-0.141133,-0.070887,-0.119767,-0.102332,-0.120173,-0.101374,-0.105185,-0.105989,-0.091582,-0.078717,0.031115
388,2022-06-30,0.096028,0.113822,0.087731,0.066327,0.066367,0.076868,0.072401,0.086043,0.112084,0.107559,-0.070246
389,2022-07-29,-0.021782,-0.037529,-0.011224,-0.009498,-0.020434,-0.021681,-0.028096,-0.018516,-0.050197,-0.039783,-0.017794


In [149]:
# calculate cumulative returns
port_df['HML_cum_rets'] = (1+port_df['HML_rets']).cumprod()

In [150]:
port_df

port,date,1,10,2,3,4,5,6,7,8,9,HML_rets,HML_cum_rets
0,1990-02-28,0.058373,0.032882,0.058850,0.016039,0.069584,0.067306,0.064337,0.034685,0.080269,0.072513,,
1,1990-03-30,-0.022048,-0.006201,-0.050032,-0.029563,-0.034384,-0.014393,-0.034759,-0.030832,-0.012337,-0.038187,0.025491,1.025491
2,1990-04-30,0.083160,0.134593,0.090239,0.087674,0.086855,0.098119,0.090896,0.096373,0.124065,0.142879,-0.015846,1.009241
3,1990-05-31,-0.018901,0.037670,-0.015339,-0.007718,-0.028059,-0.021199,-0.015391,0.007216,-0.000568,0.030464,-0.051434,0.957332
4,1990-06-29,-0.039871,-0.028269,-0.032141,-0.024673,-0.011727,-0.017465,-0.011508,-0.030474,-0.008174,-0.042653,-0.056571,0.903175
...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,0.037972,0.006857,0.043861,0.026903,0.028772,0.011848,0.012112,-0.012093,0.003952,-0.025084,0.013869,0.253647
387,2022-05-31,-0.141133,-0.070887,-0.119767,-0.102332,-0.120173,-0.101374,-0.105185,-0.105989,-0.091582,-0.078717,0.031115,0.261539
388,2022-06-30,0.096028,0.113822,0.087731,0.066327,0.066367,0.076868,0.072401,0.086043,0.112084,0.107559,-0.070246,0.243167
389,2022-07-29,-0.021782,-0.037529,-0.011224,-0.009498,-0.020434,-0.021681,-0.028096,-0.018516,-0.050197,-0.039783,-0.017794,0.238840


In [151]:
px.line(port_df, y='HML_cum_rets', x='date', labels={'HML_cum_rets': 'Cumulative returns'}, title = 'High-minus-Low').show()

In [152]:
px.histogram(port_df[['HML_rets']]).show()

In [153]:
port_df["date"]= pd.to_datetime(port_df.date)

In [154]:
monthly_rets = port_df[["date", "HML_rets"]].resample('M', on='date').mean() # Calculate average daily return for each month
monthly_rets['Colour']='red' # Add column with 'red' value
monthly_rets.loc[monthly_rets['HML_rets']>=0, 'Colour']='green' # Replace 'red' with 'green' where return is non-negative
px.bar(monthly_rets,x=monthly_rets.index,y="HML_rets").update_traces(marker_color=monthly_rets["Colour"]).show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [155]:
mean_ret = port_df['HML_rets'].mean()
volatility = port_df['HML_rets'].std()
print(mean_ret, volatility)

-0.00128236929210746 0.06866328287837034


In [156]:
print('Annualised expected return is: '+str(round(100*mean_ret*252,2)) +'%')
print('Annualised volatility is: '+str(round(100*volatility*np.sqrt(252),2)) +'%')

Annualised expected return is: -32.32%
Annualised volatility is: 109.0%


In [157]:
(port_df['HML_rets']*100).describe()

count    390.000000
mean      -0.128237
std        6.866328
min      -33.848031
25%       -3.383808
50%       -0.515036
75%        2.882070
max       43.122856
Name: HML_rets, dtype: float64

In [158]:
print(skew(port_df['HML_rets'].dropna()))

0.5303804865189324


In [159]:
print(kurtosis(port_df["HML_rets"].dropna()))

6.12636283319347


In [160]:
VaR = port_df['HML_rets'].quantile(0.005)
VaR

-0.2082706337966099

In [161]:
for q in [0.1, 0.05, 0.01, 0.001, 0.0001]:
    VaR = port_df['HML_rets'].quantile(q)
    print('There is a '+str(round(100*q,3))+'% chance that my losses will be '+str(round(VaR*100, 2))+'% or worse over the next day.')

There is a 10.0% chance that my losses will be -7.32% or worse over the next day.
There is a 5.0% chance that my losses will be -10.04% or worse over the next day.
There is a 1.0% chance that my losses will be -16.5% or worse over the next day.
There is a 0.1% chance that my losses will be -31.07% or worse over the next day.
There is a 0.01% chance that my losses will be -33.57% or worse over the next day.


In [162]:
VaR = port_df['HML_rets'].quantile(0.005)
rets_tail = port_df.loc[port_df['HML_rets']<=VaR,["date", "HML_rets", "HML_cum_rets"]]
rets_tail

port,date,HML_rets,HML_cum_rets
120,2000-02-29,-0.33848,0.090761
361,2020-03-31,-0.267133,0.126105


In [163]:
ES = rets_tail['HML_rets'].mean()
ES

-0.30280670585424874

In [164]:
for q in [0.1, 0.05, 0.01, 0.001, 0.0001]:
    VaR = port_df['HML_rets'].quantile(q)
    rets_tail = port_df.loc[port_df['HML_rets']<=VaR,:]
    ES = rets_tail['HML_rets'].mean()
    print('I expect to lose on average '+ str(round(ES*100, 2))+'% over 1-day period given that I have exceeded my VaR for '+str(round(100*q,3))+'%.')

I expect to lose on average -11.75% over 1-day period given that I have exceeded my VaR for 10.0%.
I expect to lose on average -14.82% over 1-day period given that I have exceeded my VaR for 5.0%.
I expect to lose on average -24.84% over 1-day period given that I have exceeded my VaR for 1.0%.
I expect to lose on average -33.85% over 1-day period given that I have exceeded my VaR for 0.1%.
I expect to lose on average -33.85% over 1-day period given that I have exceeded my VaR for 0.01%.


In [165]:
# according to bloomberg the risk free rate (treasury yields) 4.76 % (last 30 years)
# calculation of sharpe ratio

rf = 0.0476
sharpe_ratio = (mean_ret-rf)/volatility
sharpe_ratio

-0.7119142464932438

In [166]:
# get SP500 data from 1990-01-31 to 2022-08-30	
sp500_ticker = yf.Ticker("^GSPC")
sp500 = sp500_ticker.history(start="1990-02-28", end="2022-09-30", interval="1mo")
sp500 = sp500[["Close"]]
sp500["Rets"] = sp500.Close.pct_change()

In [167]:
sp500

Unnamed: 0_level_0,Close,Rets
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1990-03-01 00:00:00-05:00,339.940002,
1990-04-01 00:00:00-05:00,330.799988,-0.026887
1990-05-01 00:00:00-04:00,361.230011,0.091989
1990-06-01 00:00:00-04:00,358.019989,-0.008886
1990-07-01 00:00:00-04:00,356.149994,-0.005223
...,...,...
2022-05-01 00:00:00-04:00,4132.149902,0.000053
2022-06-01 00:00:00-04:00,3785.379883,-0.083920
2022-07-01 00:00:00-04:00,4130.290039,0.091116
2022-08-01 00:00:00-04:00,3955.000000,-0.042440


In [168]:
# Calculate the average return of the market
average_market_return = sp500.Rets.mean()

# Calculate the covariance between portfolio and market returns
covariance = port_df['HML_rets'].cov(sp500['Rets'].reset_index(drop=True))

# Calculate the variance of the market returns
market_variance = sp500['Rets'].var()

# Calculate the portfolio's beta
portfolio_beta = covariance / market_variance

# Calculate Jensen's Alpha
jensens_alpha = mean_ret - (rf + portfolio_beta * (average_market_return - rf))

print("Portfolio Beta:", portfolio_beta)
print("Jensen's Alpha:", jensens_alpha)

Portfolio Beta: -0.09615824896771613
Jensen's Alpha: -0.0527876284407975


In [169]:
# treynor ratio
treynor_ratio = (mean_ret - rf)/portfolio_beta
treynor_ratio

0.5083533635114246

In [170]:
# maximum drawdown
peak = port_df.loc[1, "HML_cum_rets"]
max_drawdown = 0
for ret in port_df["HML_cum_rets"].dropna():
    if ret >= peak:
        peak = ret
    else:
        drawdown = (peak - ret)/peak
        max_drawdown = max(max_drawdown, drawdown)

print(max_drawdown)

0.9114953622751586


In [171]:
# calmar-ratio
annualised_exp_return = round(mean_ret*12,2)
calmar_ratio = (annualised_exp_return - rf)/max_drawdown
calmar_ratio

-0.07416384415962962