# Group Project
#### **FINN43815 - Python for Finance**

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.stats import kurtosis, skew
import yfinance as yf

### HML Strategy (Q=100)

#### 1. Import Data

In [2]:
data = pd.read_csv('cleaned_data.csv')

  data = pd.read_csv('cleaned_data.csv')


In [3]:
# drop unnecessary columns
data = data[["date", "permno", "price", "p2b"]]
data

Unnamed: 0,date,permno,price,p2b
0,1990-01-31,0111145D UN Equity,20.6875,
1,1990-02-28,0111145D UN Equity,22.0000,
2,1990-03-30,0111145D UN Equity,19.5000,
3,1990-04-30,0111145D UN Equity,17.6250,1.5612
4,1990-05-31,0111145D UN Equity,19.8125,1.7550
...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,2.9386
514692,2022-05-31,J UN Equity,140.0900,2.9712
514693,2022-06-30,J UN Equity,127.1300,2.6964
514694,2022-07-29,J UN Equity,137.3000,2.9522


#### 2. Calculate monthly returns

In [4]:
data['rets'] = data.groupby('permno')['price'].transform(pd.Series.pct_change)
data.head()

  data['rets'] = data.groupby('permno')['price'].transform(pd.Series.pct_change)


Unnamed: 0,date,permno,price,p2b,rets
0,1990-01-31,0111145D UN Equity,20.6875,,
1,1990-02-28,0111145D UN Equity,22.0,,0.063444
2,1990-03-30,0111145D UN Equity,19.5,,-0.113636
3,1990-04-30,0111145D UN Equity,17.625,1.5612,-0.096154
4,1990-05-31,0111145D UN Equity,19.8125,1.755,0.124113


#### 3. Sort all stocks into 10 equal groups by size

In [5]:
# Filter out null values fro return and cap
data = data.loc[(~data['rets'].isnull()) & ~(data['p2b'].isnull())]
data 

Unnamed: 0,date,permno,price,p2b,rets
3,1990-04-30,0111145D UN Equity,17.6250,1.5612,-0.096154
4,1990-05-31,0111145D UN Equity,19.8125,1.7550,0.124113
5,1990-06-29,0111145D UN Equity,18.9375,1.6775,-0.044164
6,1990-07-31,0111145D UN Equity,19.6250,1.7262,0.036304
7,1990-08-31,0111145D UN Equity,20.3750,1.7922,0.038217
...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,2.9386,0.005370
514692,2022-05-31,J UN Equity,140.0900,2.9712,0.011115
514693,2022-06-30,J UN Equity,127.1300,2.6964,-0.092512
514694,2022-07-29,J UN Equity,137.3000,2.9522,0.079997


In [6]:
# sort all stocks into 10 different groups
data.loc[:,'port'] = data.groupby('date')['p2b'].transform(pd.qcut, q=100, labels=range(1,101)).astype(str)
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:,'port'] = data.groupby('date')['p2b'].transform(pd.qcut, q=100, labels=range(1,101)).astype(str)


Unnamed: 0,date,permno,price,p2b,rets,port
3,1990-04-30,0111145D UN Equity,17.6250,1.5612,-0.096154,46
4,1990-05-31,0111145D UN Equity,19.8125,1.7550,0.124113,48
5,1990-06-29,0111145D UN Equity,18.9375,1.6775,-0.044164,47
6,1990-07-31,0111145D UN Equity,19.6250,1.7262,0.036304,50
7,1990-08-31,0111145D UN Equity,20.3750,1.7922,0.038217,58
...,...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,2.9386,0.005370,48
514692,2022-05-31,J UN Equity,140.0900,2.9712,0.011115,48
514693,2022-06-30,J UN Equity,127.1300,2.6964,-0.092512,49
514694,2022-07-29,J UN Equity,137.3000,2.9522,0.079997,49


#### 4. Calculate the trading period returns for each portfolio

In [7]:
# add columns for future retuns
data.loc[:,'rets_t1']=data.groupby('permno')['rets'].transform('shift', periods=-1)
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:,'rets_t1']=data.groupby('permno')['rets'].transform('shift', periods=-1)


Unnamed: 0,date,permno,price,p2b,rets,port,rets_t1
3,1990-04-30,0111145D UN Equity,17.6250,1.5612,-0.096154,46,0.124113
4,1990-05-31,0111145D UN Equity,19.8125,1.7550,0.124113,48,-0.044164
5,1990-06-29,0111145D UN Equity,18.9375,1.6775,-0.044164,47,0.036304
6,1990-07-31,0111145D UN Equity,19.6250,1.7262,0.036304,50,0.038217
7,1990-08-31,0111145D UN Equity,20.3750,1.7922,0.038217,58,0.067485
...,...,...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,2.9386,0.005370,48,0.011115
514692,2022-05-31,J UN Equity,140.0900,2.9712,0.011115,48,-0.092512
514693,2022-06-30,J UN Equity,127.1300,2.6964,-0.092512,49,0.079997
514694,2022-07-29,J UN Equity,137.3000,2.9522,0.079997,49,-0.093664


In [8]:
# calculate the achieved mean return return for each month and portfolio
port_df = data.loc[~data['port'].isnull()].groupby(['date','port'])['rets_t1'].mean().reset_index()
port_df

Unnamed: 0,date,port,rets_t1
0,1990-02-28,1,0.033242
1,1990-02-28,10,-0.126866
2,1990-02-28,100,-0.156561
3,1990-02-28,11,0.179321
4,1990-02-28,12,-0.064691
...,...,...,...
39088,2022-08-30,95,
39089,2022-08-30,96,
39090,2022-08-30,97,
39091,2022-08-30,98,


#### 5. Calculate the returns on HML strategy

In [9]:
port_df = port_df.pivot(index = 'date', columns = 'port', values = 'rets_t1').reset_index()

In [10]:
port_df

port,date,1,10,100,11,12,13,14,15,16,...,90,91,92,93,94,95,96,97,98,99
0,1990-02-28,0.033242,-0.126866,-0.156561,0.179321,-0.064691,0.035088,0.082098,-0.029750,0.000000,...,-0.057118,0.044940,-0.028270,0.159240,0.150790,0.038678,-0.074283,,0.121464,0.065419
1,1990-03-30,-0.051843,-0.051948,0.004823,0.131632,-0.079424,-0.131436,-0.142921,-0.019674,-0.078623,...,-0.140303,-0.015280,0.172814,-0.009038,0.022134,-0.117182,-0.019656,0.047458,-0.064597,0.011029
2,1990-04-30,0.067118,0.065093,0.147206,0.089874,0.019866,0.075480,0.163417,0.192063,0.093354,...,0.152722,0.070459,0.131843,0.163947,0.176467,0.114783,0.133280,0.178196,0.185923,0.014659
3,1990-05-31,0.030538,-0.045115,-0.029486,-0.061293,-0.078423,0.048612,0.019393,-0.013329,-0.021832,...,0.008645,0.027255,0.041228,0.005195,0.056063,0.052660,0.091324,0.120535,-0.006898,0.021891
4,1990-06-29,0.021787,-0.024645,-0.059288,-0.044636,-0.099055,-0.036241,-0.055929,0.012651,0.023414,...,-0.082498,-0.028524,-0.002546,0.000209,-0.068353,0.044413,-0.031074,0.009013,-0.090258,-0.062963
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,0.105623,-0.055662,0.025402,0.044583,0.055018,0.062394,0.010615,0.064913,0.044247,...,-0.050976,0.031649,-0.001340,-0.027785,-0.035516,0.016096,-0.005314,-0.025591,0.036190,0.063377
387,2022-05-31,-0.164150,-0.141159,-0.048654,-0.124567,-0.117195,-0.119350,-0.121270,-0.122420,-0.114944,...,-0.083564,-0.114077,-0.044625,-0.069244,-0.074745,-0.096837,-0.052114,-0.080950,-0.055073,-0.061737
388,2022-06-30,0.156580,0.115007,0.063567,0.105947,0.111160,0.092972,0.059768,0.083085,0.065666,...,0.046000,0.122856,0.070052,0.178872,0.114934,0.101067,0.075926,0.112517,0.115665,0.184268
389,2022-07-29,0.014708,-0.046845,-0.041762,0.006201,-0.011484,-0.032647,0.003154,-0.026105,0.010720,...,-0.019048,-0.056473,-0.022023,-0.102442,0.003677,0.019909,-0.016221,-0.034079,-0.043464,-0.087413


In [11]:
port_df['HML_rets'] = port_df['1']-port_df['100']
port_df['HML_rets'] = port_df['HML_rets'].transform('shift', periods=1)
port_df

port,date,1,10,100,11,12,13,14,15,16,...,91,92,93,94,95,96,97,98,99,HML_rets
0,1990-02-28,0.033242,-0.126866,-0.156561,0.179321,-0.064691,0.035088,0.082098,-0.029750,0.000000,...,0.044940,-0.028270,0.159240,0.150790,0.038678,-0.074283,,0.121464,0.065419,
1,1990-03-30,-0.051843,-0.051948,0.004823,0.131632,-0.079424,-0.131436,-0.142921,-0.019674,-0.078623,...,-0.015280,0.172814,-0.009038,0.022134,-0.117182,-0.019656,0.047458,-0.064597,0.011029,0.189803
2,1990-04-30,0.067118,0.065093,0.147206,0.089874,0.019866,0.075480,0.163417,0.192063,0.093354,...,0.070459,0.131843,0.163947,0.176467,0.114783,0.133280,0.178196,0.185923,0.014659,-0.056666
3,1990-05-31,0.030538,-0.045115,-0.029486,-0.061293,-0.078423,0.048612,0.019393,-0.013329,-0.021832,...,0.027255,0.041228,0.005195,0.056063,0.052660,0.091324,0.120535,-0.006898,0.021891,-0.080088
4,1990-06-29,0.021787,-0.024645,-0.059288,-0.044636,-0.099055,-0.036241,-0.055929,0.012651,0.023414,...,-0.028524,-0.002546,0.000209,-0.068353,0.044413,-0.031074,0.009013,-0.090258,-0.062963,0.060024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,0.105623,-0.055662,0.025402,0.044583,0.055018,0.062394,0.010615,0.064913,0.044247,...,0.031649,-0.001340,-0.027785,-0.035516,0.016096,-0.005314,-0.025591,0.036190,0.063377,-0.015806
387,2022-05-31,-0.164150,-0.141159,-0.048654,-0.124567,-0.117195,-0.119350,-0.121270,-0.122420,-0.114944,...,-0.114077,-0.044625,-0.069244,-0.074745,-0.096837,-0.052114,-0.080950,-0.055073,-0.061737,0.080221
388,2022-06-30,0.156580,0.115007,0.063567,0.105947,0.111160,0.092972,0.059768,0.083085,0.065666,...,0.122856,0.070052,0.178872,0.114934,0.101067,0.075926,0.112517,0.115665,0.184268,-0.115497
389,2022-07-29,0.014708,-0.046845,-0.041762,0.006201,-0.011484,-0.032647,0.003154,-0.026105,0.010720,...,-0.056473,-0.022023,-0.102442,0.003677,0.019909,-0.016221,-0.034079,-0.043464,-0.087413,0.093013


In [12]:
# calculate cumulative returns
port_df['HML_cum_rets'] = (1+port_df['HML_rets']).cumprod()

In [13]:
port_df

port,date,1,10,100,11,12,13,14,15,16,...,92,93,94,95,96,97,98,99,HML_rets,HML_cum_rets
0,1990-02-28,0.033242,-0.126866,-0.156561,0.179321,-0.064691,0.035088,0.082098,-0.029750,0.000000,...,-0.028270,0.159240,0.150790,0.038678,-0.074283,,0.121464,0.065419,,
1,1990-03-30,-0.051843,-0.051948,0.004823,0.131632,-0.079424,-0.131436,-0.142921,-0.019674,-0.078623,...,0.172814,-0.009038,0.022134,-0.117182,-0.019656,0.047458,-0.064597,0.011029,0.189803,1.189803
2,1990-04-30,0.067118,0.065093,0.147206,0.089874,0.019866,0.075480,0.163417,0.192063,0.093354,...,0.131843,0.163947,0.176467,0.114783,0.133280,0.178196,0.185923,0.014659,-0.056666,1.122382
3,1990-05-31,0.030538,-0.045115,-0.029486,-0.061293,-0.078423,0.048612,0.019393,-0.013329,-0.021832,...,0.041228,0.005195,0.056063,0.052660,0.091324,0.120535,-0.006898,0.021891,-0.080088,1.032492
4,1990-06-29,0.021787,-0.024645,-0.059288,-0.044636,-0.099055,-0.036241,-0.055929,0.012651,0.023414,...,-0.002546,0.000209,-0.068353,0.044413,-0.031074,0.009013,-0.090258,-0.062963,0.060024,1.094467
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,0.105623,-0.055662,0.025402,0.044583,0.055018,0.062394,0.010615,0.064913,0.044247,...,-0.001340,-0.027785,-0.035516,0.016096,-0.005314,-0.025591,0.036190,0.063377,-0.015806,1.846244
387,2022-05-31,-0.164150,-0.141159,-0.048654,-0.124567,-0.117195,-0.119350,-0.121270,-0.122420,-0.114944,...,-0.044625,-0.069244,-0.074745,-0.096837,-0.052114,-0.080950,-0.055073,-0.061737,0.080221,1.994352
388,2022-06-30,0.156580,0.115007,0.063567,0.105947,0.111160,0.092972,0.059768,0.083085,0.065666,...,0.070052,0.178872,0.114934,0.101067,0.075926,0.112517,0.115665,0.184268,-0.115497,1.764011
389,2022-07-29,0.014708,-0.046845,-0.041762,0.006201,-0.011484,-0.032647,0.003154,-0.026105,0.010720,...,-0.022023,-0.102442,0.003677,0.019909,-0.016221,-0.034079,-0.043464,-0.087413,0.093013,1.928086


In [14]:
px.line(port_df, y='HML_cum_rets', x='date', labels={'HML_cum_rets': 'Cumulative returns'}, title = 'High-minus-Low').show()

In [15]:
px.histogram(port_df[['HML_rets']]).show()

In [16]:
port_df["date"]= pd.to_datetime(port_df.date)

In [17]:
monthly_rets = port_df[["date", "HML_rets"]].resample('M', on='date').mean() # Calculate average daily return for each month
monthly_rets['Colour']='red' # Add column with 'red' value
monthly_rets.loc[monthly_rets['HML_rets']>=0, 'Colour']='green' # Replace 'red' with 'green' where return is non-negative
px.bar(monthly_rets,x=monthly_rets.index,y="HML_rets").update_traces(marker_color=monthly_rets["Colour"]).show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [18]:
mean_ret = port_df['HML_rets'].mean()
volatility = port_df['HML_rets'].std()
print(mean_ret, volatility)

0.009702666334142303 0.1257847676399627


In [19]:
print('Annualised expected return is: '+str(round(100*mean_ret*252,2)) +'%')
print('Annualised volatility is: '+str(round(100*volatility*np.sqrt(252),2)) +'%')

Annualised expected return is: 244.51%
Annualised volatility is: 199.68%


In [20]:
(port_df['HML_rets']*100).describe()

count    390.000000
mean       0.970267
std       12.578477
min      -58.373477
25%       -5.161440
50%        0.326586
75%        6.128422
max       56.679331
Name: HML_rets, dtype: float64

In [21]:
print(skew(port_df['HML_rets'].dropna()))

0.733612055345065


In [22]:
print(kurtosis(port_df["HML_rets"].dropna()))

5.761734374791724


In [23]:
VaR = port_df['HML_rets'].quantile(0.005)
VaR

-0.3504055771828437

In [24]:
for q in [0.1, 0.05, 0.01, 0.001, 0.0001]:
    VaR = port_df['HML_rets'].quantile(q)
    print('There is a '+str(round(100*q,3))+'% chance that my losses will be '+str(round(VaR*100, 2))+'% or worse over the next day.')

There is a 10.0% chance that my losses will be -11.2% or worse over the next day.
There is a 5.0% chance that my losses will be -15.32% or worse over the next day.
There is a 1.0% chance that my losses will be -32.35% or worse over the next day.
There is a 0.1% chance that my losses will be -56.46% or worse over the next day.
There is a 0.01% chance that my losses will be -58.18% or worse over the next day.


In [25]:
VaR = port_df['HML_rets'].quantile(0.005)
rets_tail = port_df.loc[port_df['HML_rets']<=VaR,["date", "HML_rets", "HML_cum_rets"]]
rets_tail

port,date,HML_rets,HML_cum_rets
100,1998-06-30,-0.534518,0.568058
120,2000-02-29,-0.583735,0.132963


In [26]:
ES = rets_tail['HML_rets'].mean()
ES

-0.5591265542584181

In [27]:
for q in [0.1, 0.05, 0.01, 0.001, 0.0001]:
    VaR = port_df['HML_rets'].quantile(q)
    rets_tail = port_df.loc[port_df['HML_rets']<=VaR,:]
    ES = rets_tail['HML_rets'].mean()
    print('I expect to lose on average '+ str(round(ES*100, 2))+'% over 1-day period given that I have exceeded my VaR for '+str(round(100*q,3))+'%.')

I expect to lose on average -19.1% over 1-day period given that I have exceeded my VaR for 10.0%.
I expect to lose on average -25.13% over 1-day period given that I have exceeded my VaR for 5.0%.
I expect to lose on average -44.77% over 1-day period given that I have exceeded my VaR for 1.0%.
I expect to lose on average -58.37% over 1-day period given that I have exceeded my VaR for 0.1%.
I expect to lose on average -58.37% over 1-day period given that I have exceeded my VaR for 0.01%.


In [28]:
# according to bloomberg the risk free rate (treasury yields) 4.76 % (last 30 years)
# calculation of sharpe ratio

rf = 0.0476
sharpe_ratio = (mean_ret-rf)/volatility
sharpe_ratio

-0.3012871461060556

In [29]:
# get SP500 data from 1990-01-31 to 2022-08-30	
sp500_ticker = yf.Ticker("^GSPC")
sp500 = sp500_ticker.history(start="1990-02-28", end="2022-09-30", interval="1mo")
sp500 = sp500[["Close"]]
sp500["Rets"] = sp500.Close.pct_change()

In [30]:
sp500

Unnamed: 0_level_0,Close,Rets
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1990-03-01 00:00:00-05:00,339.940002,
1990-04-01 00:00:00-05:00,330.799988,-0.026887
1990-05-01 00:00:00-04:00,361.230011,0.091989
1990-06-01 00:00:00-04:00,358.019989,-0.008886
1990-07-01 00:00:00-04:00,356.149994,-0.005223
...,...,...
2022-05-01 00:00:00-04:00,4132.149902,0.000053
2022-06-01 00:00:00-04:00,3785.379883,-0.083920
2022-07-01 00:00:00-04:00,4130.290039,0.091116
2022-08-01 00:00:00-04:00,3955.000000,-0.042440


In [31]:
# Calculate the average return of the market
average_market_return = sp500.Rets.mean()

# Calculate the covariance between portfolio and market returns
covariance = port_df['HML_rets'].cov(sp500['Rets'].reset_index(drop=True))

# Calculate the variance of the market returns
market_variance = sp500['Rets'].var()

# Calculate the portfolio's beta
portfolio_beta = covariance / market_variance

# Calculate Jensen's Alpha
jensens_alpha = mean_ret - (rf + portfolio_beta * (average_market_return - rf))

print("Portfolio Beta:", portfolio_beta)
print("Jensen's Alpha:", jensens_alpha)

Portfolio Beta: -0.1866899349161033
Jensen's Alpha: -0.04547934128482169


In [32]:
# treynor ratio
treynor_ratio = (mean_ret - rf)/portfolio_beta
treynor_ratio

0.20299612661436947

In [33]:
# maximum drawdown
peak = port_df.loc[1, "HML_cum_rets"]
max_drawdown = 0
for ret in port_df["HML_cum_rets"].dropna():
    if ret >= peak:
        peak = ret
    else:
        drawdown = (peak - ret)/peak
        max_drawdown = max(max_drawdown, drawdown)

print(max_drawdown)

0.9095600417508357


In [34]:
# calmar-ratio
annualised_exp_return = round(mean_ret*12,2)
calmar_ratio = (annualised_exp_return - rf)/max_drawdown
calmar_ratio

0.0795989232999235