# Group Project
#### **FINN43815 - Python for Finance**

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.stats import kurtosis, skew
import yfinance as yf

### SMB Strategy (Q=100)

#### 1. Import Data

In [2]:
data = pd.read_csv('cleaned_data.csv')

  data = pd.read_csv('cleaned_data.csv')


In [3]:
# drop unnecessary columns
data = data[["date", "permno", "price", "cap"]]
data

Unnamed: 0,date,permno,price,cap
0,1990-01-31,0111145D UN Equity,20.6875,1226.3674
1,1990-02-28,0111145D UN Equity,22.0000,1303.3636
2,1990-03-30,0111145D UN Equity,19.5000,1155.2541
3,1990-04-30,0111145D UN Equity,17.6250,1021.1925
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362
...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,17902.9731
514692,2022-05-31,J UN Equity,140.0900,18019.3940
514693,2022-06-30,J UN Equity,127.1300,16352.3846
514694,2022-07-29,J UN Equity,137.3000,17660.5239


#### 2. Calculate monthly returns

In [4]:
data['rets'] = data.groupby('permno')['price'].transform(pd.Series.pct_change)
data.head()

  data['rets'] = data.groupby('permno')['price'].transform(pd.Series.pct_change)


Unnamed: 0,date,permno,price,cap,rets
0,1990-01-31,0111145D UN Equity,20.6875,1226.3674,
1,1990-02-28,0111145D UN Equity,22.0,1303.3636,0.063444
2,1990-03-30,0111145D UN Equity,19.5,1155.2541,-0.113636
3,1990-04-30,0111145D UN Equity,17.625,1021.1925,-0.096154
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362,0.124113


#### 3. Sort all stocks into 10 equal groups by size

In [5]:
# Filter out null values fro return and cap
data = data.loc[(~data['rets'].isnull()) & ~(data['cap'].isnull())]
data 

Unnamed: 0,date,permno,price,cap,rets
1,1990-02-28,0111145D UN Equity,22.0000,1303.3636,0.063444
2,1990-03-30,0111145D UN Equity,19.5000,1155.2541,-0.113636
3,1990-04-30,0111145D UN Equity,17.6250,1021.1925,-0.096154
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362,0.124113
5,1990-06-29,0111145D UN Equity,18.9375,1097.2387,-0.044164
...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,17902.9731,0.005370
514692,2022-05-31,J UN Equity,140.0900,18019.3940,0.011115
514693,2022-06-30,J UN Equity,127.1300,16352.3846,-0.092512
514694,2022-07-29,J UN Equity,137.3000,17660.5239,0.079997


In [7]:
# sort all stocks into 10 different groups
data.loc[:,'port'] = data.groupby('date')['cap'].transform(pd.qcut, q=100, labels=range(1,101)).astype(str)
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:,'port'] = data.groupby('date')['cap'].transform(pd.qcut, q=100, labels=range(1,101)).astype(str)


Unnamed: 0,date,permno,price,cap,rets,port
1,1990-02-28,0111145D UN Equity,22.0000,1303.3636,0.063444,47
2,1990-03-30,0111145D UN Equity,19.5000,1155.2541,-0.113636,41
3,1990-04-30,0111145D UN Equity,17.6250,1021.1925,-0.096154,37
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362,0.124113,38
5,1990-06-29,0111145D UN Equity,18.9375,1097.2387,-0.044164,37
...,...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,17902.9731,0.005370,40
514692,2022-05-31,J UN Equity,140.0900,18019.3940,0.011115,40
514693,2022-06-30,J UN Equity,127.1300,16352.3846,-0.092512,40
514694,2022-07-29,J UN Equity,137.3000,17660.5239,0.079997,41


#### 4. Calculate the trading period returns for each portfolio

In [8]:
# add columns for future retuns
data.loc[:,'rets_t1']=data.groupby('permno')['rets'].transform('shift', periods=-1)
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:,'rets_t1']=data.groupby('permno')['rets'].transform('shift', periods=-1)


Unnamed: 0,date,permno,price,cap,rets,port,rets_t1
1,1990-02-28,0111145D UN Equity,22.0000,1303.3636,0.063444,47,-0.113636
2,1990-03-30,0111145D UN Equity,19.5000,1155.2541,-0.113636,41,-0.096154
3,1990-04-30,0111145D UN Equity,17.6250,1021.1925,-0.096154,37,0.124113
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362,0.124113,38,-0.044164
5,1990-06-29,0111145D UN Equity,18.9375,1097.2387,-0.044164,37,0.036304
...,...,...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,17902.9731,0.005370,40,0.011115
514692,2022-05-31,J UN Equity,140.0900,18019.3940,0.011115,40,-0.092512
514693,2022-06-30,J UN Equity,127.1300,16352.3846,-0.092512,40,0.079997
514694,2022-07-29,J UN Equity,137.3000,17660.5239,0.079997,41,-0.093664


In [9]:
# calculate the achieved mean return return for each month and port 
port_df = data.loc[~data['port'].isnull()].groupby(['date','port'])['rets_t1'].mean().reset_index()
port_df

Unnamed: 0,date,port,rets_t1
0,1990-02-28,1,0.163373
1,1990-02-28,10,0.051086
2,1990-02-28,100,0.024544
3,1990-02-28,11,0.074192
4,1990-02-28,12,0.089466
...,...,...,...
39095,2022-08-30,95,
39096,2022-08-30,96,
39097,2022-08-30,97,
39098,2022-08-30,98,


#### 5. Calculate the returns on SMB strategy

In [10]:
port_df = port_df.pivot(index = 'date', columns = 'port', values = 'rets_t1').reset_index()

In [11]:
port_df

port,date,1,10,100,11,12,13,14,15,16,...,90,91,92,93,94,95,96,97,98,99
0,1990-02-28,0.163373,0.051086,0.024544,0.074192,0.089466,-0.018941,0.065403,0.077017,0.017527,...,0.013982,-0.011265,0.004934,0.062131,0.046729,0.008501,0.037549,0.036032,0.046161,0.014614
1,1990-03-30,0.298690,-0.030818,-0.002841,0.006307,0.000206,-0.115443,-0.043391,0.032265,-0.041392,...,-0.042241,-0.047951,0.025458,-0.045906,-0.040711,0.007734,-0.028092,-0.015353,-0.017842,0.008510
2,1990-04-30,0.157492,0.003123,0.067412,0.098035,-0.017042,0.046292,0.114019,0.136590,0.102370,...,0.153475,0.107278,0.051848,0.123254,0.088769,0.115392,0.055386,0.128733,0.077517,0.106376
3,1990-05-31,0.031771,0.006995,0.005653,0.042784,-0.006678,-0.034741,-0.003484,0.019890,0.042697,...,-0.007914,-0.001726,-0.000867,0.007429,-0.009930,-0.020153,-0.011213,0.002758,-0.001713,0.015414
4,1990-06-29,-0.184523,-0.029952,0.010237,-0.062854,-0.122559,-0.110461,0.006353,0.022096,-0.001009,...,0.005229,-0.068007,-0.037727,-0.001814,-0.083093,-0.001572,0.007015,-0.024019,0.049611,0.020725
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,-0.056612,0.030597,-0.023808,0.036436,0.012554,0.053729,0.015053,0.009379,-0.110540,...,0.076668,0.029231,0.008972,0.045140,0.036232,-0.023666,-0.023127,0.044546,-0.017178,-0.039759
387,2022-05-31,-0.119514,-0.150425,-0.056971,-0.164130,-0.150231,-0.099078,-0.150350,-0.099206,-0.224766,...,-0.059742,-0.029930,-0.135599,-0.093367,-0.059701,-0.041054,-0.026180,-0.082665,-0.099762,-0.109882
388,2022-06-30,0.216503,0.076941,0.109278,0.101573,0.079974,0.088983,0.149325,0.124988,0.105389,...,0.086495,0.095931,0.014851,0.041351,0.042683,0.099549,-0.007408,0.053511,0.109607,0.141671
389,2022-07-29,0.421900,-0.066542,-0.050784,-0.034366,-0.026428,-0.013226,0.005024,-0.038185,-0.002953,...,-0.025276,-0.076172,-0.049986,-0.025877,-0.029667,-0.046520,-0.024897,-0.033840,-0.062302,-0.052228


In [12]:
port_df['SMB_rets'] = port_df['1']-port_df['100']
port_df['SMB_rets'] = port_df['SMB_rets'].transform('shift', periods=1)
port_df

port,date,1,10,100,11,12,13,14,15,16,...,91,92,93,94,95,96,97,98,99,SMB_rets
0,1990-02-28,0.163373,0.051086,0.024544,0.074192,0.089466,-0.018941,0.065403,0.077017,0.017527,...,-0.011265,0.004934,0.062131,0.046729,0.008501,0.037549,0.036032,0.046161,0.014614,
1,1990-03-30,0.298690,-0.030818,-0.002841,0.006307,0.000206,-0.115443,-0.043391,0.032265,-0.041392,...,-0.047951,0.025458,-0.045906,-0.040711,0.007734,-0.028092,-0.015353,-0.017842,0.008510,0.138829
2,1990-04-30,0.157492,0.003123,0.067412,0.098035,-0.017042,0.046292,0.114019,0.136590,0.102370,...,0.107278,0.051848,0.123254,0.088769,0.115392,0.055386,0.128733,0.077517,0.106376,0.301531
3,1990-05-31,0.031771,0.006995,0.005653,0.042784,-0.006678,-0.034741,-0.003484,0.019890,0.042697,...,-0.001726,-0.000867,0.007429,-0.009930,-0.020153,-0.011213,0.002758,-0.001713,0.015414,0.090080
4,1990-06-29,-0.184523,-0.029952,0.010237,-0.062854,-0.122559,-0.110461,0.006353,0.022096,-0.001009,...,-0.068007,-0.037727,-0.001814,-0.083093,-0.001572,0.007015,-0.024019,0.049611,0.020725,0.026117
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,-0.056612,0.030597,-0.023808,0.036436,0.012554,0.053729,0.015053,0.009379,-0.110540,...,0.029231,0.008972,0.045140,0.036232,-0.023666,-0.023127,0.044546,-0.017178,-0.039759,0.076807
387,2022-05-31,-0.119514,-0.150425,-0.056971,-0.164130,-0.150231,-0.099078,-0.150350,-0.099206,-0.224766,...,-0.029930,-0.135599,-0.093367,-0.059701,-0.041054,-0.026180,-0.082665,-0.099762,-0.109882,-0.032804
388,2022-06-30,0.216503,0.076941,0.109278,0.101573,0.079974,0.088983,0.149325,0.124988,0.105389,...,0.095931,0.014851,0.041351,0.042683,0.099549,-0.007408,0.053511,0.109607,0.141671,-0.062543
389,2022-07-29,0.421900,-0.066542,-0.050784,-0.034366,-0.026428,-0.013226,0.005024,-0.038185,-0.002953,...,-0.076172,-0.049986,-0.025877,-0.029667,-0.046520,-0.024897,-0.033840,-0.062302,-0.052228,0.107224


In [13]:
# calculate cumulative returns
port_df['SMB_cum_rets'] = (1+port_df['SMB_rets']).cumprod()

In [14]:
port_df

port,date,1,10,100,11,12,13,14,15,16,...,92,93,94,95,96,97,98,99,SMB_rets,SMB_cum_rets
0,1990-02-28,0.163373,0.051086,0.024544,0.074192,0.089466,-0.018941,0.065403,0.077017,0.017527,...,0.004934,0.062131,0.046729,0.008501,0.037549,0.036032,0.046161,0.014614,,
1,1990-03-30,0.298690,-0.030818,-0.002841,0.006307,0.000206,-0.115443,-0.043391,0.032265,-0.041392,...,0.025458,-0.045906,-0.040711,0.007734,-0.028092,-0.015353,-0.017842,0.008510,0.138829,1.138829
2,1990-04-30,0.157492,0.003123,0.067412,0.098035,-0.017042,0.046292,0.114019,0.136590,0.102370,...,0.051848,0.123254,0.088769,0.115392,0.055386,0.128733,0.077517,0.106376,0.301531,1.482222
3,1990-05-31,0.031771,0.006995,0.005653,0.042784,-0.006678,-0.034741,-0.003484,0.019890,0.042697,...,-0.000867,0.007429,-0.009930,-0.020153,-0.011213,0.002758,-0.001713,0.015414,0.090080,1.615740
4,1990-06-29,-0.184523,-0.029952,0.010237,-0.062854,-0.122559,-0.110461,0.006353,0.022096,-0.001009,...,-0.037727,-0.001814,-0.083093,-0.001572,0.007015,-0.024019,0.049611,0.020725,0.026117,1.657938
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,-0.056612,0.030597,-0.023808,0.036436,0.012554,0.053729,0.015053,0.009379,-0.110540,...,0.008972,0.045140,0.036232,-0.023666,-0.023127,0.044546,-0.017178,-0.039759,0.076807,215.823172
387,2022-05-31,-0.119514,-0.150425,-0.056971,-0.164130,-0.150231,-0.099078,-0.150350,-0.099206,-0.224766,...,-0.135599,-0.093367,-0.059701,-0.041054,-0.026180,-0.082665,-0.099762,-0.109882,-0.032804,208.743402
388,2022-06-30,0.216503,0.076941,0.109278,0.101573,0.079974,0.088983,0.149325,0.124988,0.105389,...,0.014851,0.041351,0.042683,0.099549,-0.007408,0.053511,0.109607,0.141671,-0.062543,195.687987
389,2022-07-29,0.421900,-0.066542,-0.050784,-0.034366,-0.026428,-0.013226,0.005024,-0.038185,-0.002953,...,-0.049986,-0.025877,-0.029667,-0.046520,-0.024897,-0.033840,-0.062302,-0.052228,0.107224,216.670467


### Evaluation

In [15]:
px.line(port_df, y='SMB_cum_rets', x='date', labels={'SMB_cum_rets': 'Cumulative returns'}, title = 'Small-minus-Big').show()

In [16]:
px.histogram(port_df[['SMB_rets']]).show()

In [17]:
port_df["date"]= pd.to_datetime(port_df.date)

In [18]:
monthly_rets = port_df[["date", "SMB_rets"]].resample('M', on='date').mean() # Calculate average daily return for each month
monthly_rets['Colour']='red' # Add column with 'red' value
monthly_rets.loc[monthly_rets['SMB_rets']>=0, 'Colour']='green' # Replace 'red' with 'green' where return is non-negative
px.bar(monthly_rets,x=monthly_rets.index,y="SMB_rets").update_traces(marker_color=monthly_rets["Colour"]).show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



### Performance Metrics

In [19]:
mean_ret = port_df['SMB_rets'].mean()
volatility = port_df['SMB_rets'].std()
print(mean_ret, volatility)

0.022195523161254935 0.12587744308107873


In [20]:
print('Annualised expected return is: '+str(round(100*mean_ret*12,2)) +'%')
print('Annualised volatility is: '+str(round(100*volatility*np.sqrt(12),2)) +'%')

Annualised expected return is: 26.63%
Annualised volatility is: 43.61%


In [21]:
(port_df['SMB_rets']*100).describe()

count    390.000000
mean       2.219552
std       12.587744
min      -40.118455
25%       -5.535791
50%        1.059488
75%        8.026951
max       57.889406
Name: SMB_rets, dtype: float64

In [22]:
print(skew(port_df['SMB_rets'].dropna()))

1.0804934092780791


In [23]:
print(kurtosis(port_df["SMB_rets"].dropna()))

2.8358614301534333


In [24]:
VaR = port_df['SMB_rets'].quantile(0.005)
VaR

-0.20198024796655661

In [25]:
for q in [0.1, 0.05, 0.01, 0.001, 0.0001]:
    VaR = port_df['SMB_rets'].quantile(q)
    print('There is a '+str(round(100*q,3))+'% chance that my losses will be '+str(round(VaR*100, 2))+'% or worse over the next day.')

There is a 10.0% chance that my losses will be -11.09% or worse over the next day.
There is a 5.0% chance that my losses will be -15.32% or worse over the next day.
There is a 1.0% chance that my losses will be -19.85% or worse over the next day.
There is a 0.1% chance that my losses will be -32.4% or worse over the next day.
There is a 0.01% chance that my losses will be -39.35% or worse over the next day.


In [26]:
VaR = port_df['SMB_rets'].quantile(0.005)
rets_tail = port_df.loc[port_df['SMB_rets']<=VaR,["date", "SMB_rets", "SMB_cum_rets"]]
rets_tail

port,date,SMB_rets,SMB_cum_rets
86,1997-04-30,-0.202647,10.138008
361,2020-03-31,-0.401185,60.678075


In [27]:
ES = rets_tail['SMB_rets'].mean()
ES

-0.30191554459679737

In [28]:
for q in [0.1, 0.05, 0.01, 0.001, 0.0001]:
    VaR = port_df['SMB_rets'].quantile(q)
    rets_tail = port_df.loc[port_df['SMB_rets']<=VaR,:]
    ES = rets_tail['SMB_rets'].mean()
    print('I expect to lose on average '+ str(round(ES*100, 2))+'% over 1-day period given that I have exceeded my VaR for '+str(round(100*q,3))+'%.')

I expect to lose on average -16.2% over 1-day period given that I have exceeded my VaR for 10.0%.
I expect to lose on average -19.09% over 1-day period given that I have exceeded my VaR for 5.0%.
I expect to lose on average -25.11% over 1-day period given that I have exceeded my VaR for 1.0%.
I expect to lose on average -40.12% over 1-day period given that I have exceeded my VaR for 0.1%.
I expect to lose on average -40.12% over 1-day period given that I have exceeded my VaR for 0.01%.


In [29]:
# according to bloomberg the risk free rate (treasury yields) 4.76 % (last 30 years)
# calculation of sharpe ratio

rf = 0.0476
sharpe_ratio = (mean_ret-rf)/volatility
sharpe_ratio

-0.20181913627195167

In [30]:
# get SP500 data from 1990-01-31 to 2022-08-30	
sp500_ticker = yf.Ticker("^GSPC")
sp500 = sp500_ticker.history(start="1990-02-28", end="2022-09-30", interval="1mo")
sp500 = sp500[["Close"]]
sp500["Rets"] = sp500.Close.pct_change()

In [31]:
sp500

Unnamed: 0_level_0,Close,Rets
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1990-03-01 00:00:00-05:00,339.940002,
1990-04-01 00:00:00-05:00,330.799988,-0.026887
1990-05-01 00:00:00-04:00,361.230011,0.091989
1990-06-01 00:00:00-04:00,358.019989,-0.008886
1990-07-01 00:00:00-04:00,356.149994,-0.005223
...,...,...
2022-05-01 00:00:00-04:00,4132.149902,0.000053
2022-06-01 00:00:00-04:00,3785.379883,-0.083920
2022-07-01 00:00:00-04:00,4130.290039,0.091116
2022-08-01 00:00:00-04:00,3955.000000,-0.042440


In [32]:
# Calculate the average return of the market
average_market_return = sp500.Rets.mean()

# Calculate the covariance between portfolio and market returns
covariance = port_df['SMB_rets'].cov(sp500['Rets'].reset_index(drop=True))

# Calculate the variance of the market returns
market_variance = sp500['Rets'].var()

# Calculate the portfolio's beta
portfolio_beta = covariance / market_variance

# Calculate Jensen's Alpha
jensens_alpha = mean_ret - (rf + portfolio_beta * (average_market_return - rf))

print("Portfolio Beta:", portfolio_beta)
print("Jensen's Alpha:", jensens_alpha)

Portfolio Beta: -0.033557971340326065
Jensen's Alpha: -0.026767361208990675


In [33]:
# treynor ratio
treynor_ratio = (mean_ret - rf)/portfolio_beta
treynor_ratio

0.7570325566199207

In [34]:
# maximum drawdown
peak = port_df.loc[1, "SMB_cum_rets"]
max_drawdown = 0
for ret in port_df["SMB_cum_rets"].dropna():
    if ret >= peak:
        peak = ret
    else:
        drawdown = (peak - ret)/peak
        max_drawdown = max(max_drawdown, drawdown)

print(max_drawdown)

0.8895932964294685


In [35]:
# calmar-ratio
annualised_exp_return = round(mean_ret*12,2)
calmar_ratio = (annualised_exp_return - rf)/max_drawdown
calmar_ratio

0.25000188388631034