# Group Project
#### **FINN43815 - Python for Finance**

In [129]:
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.stats import kurtosis, skew
import yfinance as yf

### SMB Strategy

#### 1. Import Data

In [130]:
data = pd.read_csv('cleaned_data.csv')


Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.



In [131]:
# drop unnecessary columns
data = data[["date", "permno", "price", "cap"]]
data

Unnamed: 0,date,permno,price,cap
0,1990-01-31,0111145D UN Equity,20.6875,1226.3674
1,1990-02-28,0111145D UN Equity,22.0000,1303.3636
2,1990-03-30,0111145D UN Equity,19.5000,1155.2541
3,1990-04-30,0111145D UN Equity,17.6250,1021.1925
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362
...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,17902.9731
514692,2022-05-31,J UN Equity,140.0900,18019.3940
514693,2022-06-30,J UN Equity,127.1300,16352.3846
514694,2022-07-29,J UN Equity,137.3000,17660.5239


#### 2. Calculate monthly returns

In [132]:
data['rets'] = data.groupby('permno')['price'].transform(pd.Series.pct_change)
data.head()





Unnamed: 0,date,permno,price,cap,rets
0,1990-01-31,0111145D UN Equity,20.6875,1226.3674,
1,1990-02-28,0111145D UN Equity,22.0,1303.3636,0.063444
2,1990-03-30,0111145D UN Equity,19.5,1155.2541,-0.113636
3,1990-04-30,0111145D UN Equity,17.625,1021.1925,-0.096154
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362,0.124113


#### 3. Sort all stocks into 10 equal groups by size

In [133]:
# Filter out null values fro return and cap
data = data.loc[(~data['rets'].isnull()) & ~(data['cap'].isnull())]
data 

Unnamed: 0,date,permno,price,cap,rets
1,1990-02-28,0111145D UN Equity,22.0000,1303.3636,0.063444
2,1990-03-30,0111145D UN Equity,19.5000,1155.2541,-0.113636
3,1990-04-30,0111145D UN Equity,17.6250,1021.1925,-0.096154
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362,0.124113
5,1990-06-29,0111145D UN Equity,18.9375,1097.2387,-0.044164
...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,17902.9731,0.005370
514692,2022-05-31,J UN Equity,140.0900,18019.3940,0.011115
514693,2022-06-30,J UN Equity,127.1300,16352.3846,-0.092512
514694,2022-07-29,J UN Equity,137.3000,17660.5239,0.079997


In [134]:
# sort all stocks into 10 different groups
data.loc[:,'port'] = data.groupby('date')['cap'].transform(pd.qcut, q=10, labels=range(1,11)).astype(str)
data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,date,permno,price,cap,rets,port
1,1990-02-28,0111145D UN Equity,22.0000,1303.3636,0.063444,5
2,1990-03-30,0111145D UN Equity,19.5000,1155.2541,-0.113636,5
3,1990-04-30,0111145D UN Equity,17.6250,1021.1925,-0.096154,4
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362,0.124113,4
5,1990-06-29,0111145D UN Equity,18.9375,1097.2387,-0.044164,4
...,...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,17902.9731,0.005370,4
514692,2022-05-31,J UN Equity,140.0900,18019.3940,0.011115,4
514693,2022-06-30,J UN Equity,127.1300,16352.3846,-0.092512,4
514694,2022-07-29,J UN Equity,137.3000,17660.5239,0.079997,5


#### 4. Calculate the trading period returns for each portfolio

In [135]:
# add columns for future retuns
data.loc[:,'rets_t1']=data.groupby('permno')['rets'].transform('shift', periods=-1)
data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,date,permno,price,cap,rets,port,rets_t1
1,1990-02-28,0111145D UN Equity,22.0000,1303.3636,0.063444,5,-0.113636
2,1990-03-30,0111145D UN Equity,19.5000,1155.2541,-0.113636,5,-0.096154
3,1990-04-30,0111145D UN Equity,17.6250,1021.1925,-0.096154,4,0.124113
4,1990-05-31,0111145D UN Equity,19.8125,1147.9362,0.124113,4,-0.044164
5,1990-06-29,0111145D UN Equity,18.9375,1097.2387,-0.044164,4,0.036304
...,...,...,...,...,...,...,...
514691,2022-04-29,J UN Equity,138.5500,17902.9731,0.005370,4,0.011115
514692,2022-05-31,J UN Equity,140.0900,18019.3940,0.011115,4,-0.092512
514693,2022-06-30,J UN Equity,127.1300,16352.3846,-0.092512,4,0.079997
514694,2022-07-29,J UN Equity,137.3000,17660.5239,0.079997,5,-0.093664


In [136]:
# calculate the achieved mean return return for each month and port 
port_df = data.loc[~data['port'].isnull()].groupby(['date','port'])['rets_t1'].mean().reset_index()
port_df

Unnamed: 0,date,port,rets_t1
0,1990-02-28,1,0.079803
1,1990-02-28,10,0.027325
2,1990-02-28,2,0.061317
3,1990-02-28,3,0.031334
4,1990-02-28,4,0.028990
...,...,...,...
3905,2022-08-30,5,
3906,2022-08-30,6,
3907,2022-08-30,7,
3908,2022-08-30,8,


#### 5. Calculate the returns on SMB strategy

In [137]:
port_df = port_df.pivot(index = 'date', columns = 'port', values = 'rets_t1').reset_index()

In [138]:
port_df

port,date,1,10,2,3,4,5,6,7,8,9
0,1990-02-28,0.079803,0.027325,0.061317,0.031334,0.028990,0.032949,0.036013,0.015437,0.022438,0.032901
1,1990-03-30,0.034176,-0.016299,-0.024953,-0.033827,-0.056571,-0.062599,-0.033857,-0.045643,-0.042175,-0.030118
2,1990-04-30,0.143240,0.093527,0.095036,0.100996,0.109279,0.106995,0.096825,0.098511,0.090184,0.096043
3,1990-05-31,0.034336,-0.001430,0.018967,-0.013480,-0.018185,-0.012922,-0.005931,-0.006232,-0.016742,-0.013181
4,1990-06-29,-0.045790,-0.011930,-0.030666,-0.023334,-0.023069,-0.022329,-0.039043,-0.033795,-0.010868,0.008942
...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,-0.015961,0.003806,0.008648,0.004065,0.006674,0.027539,-0.004353,0.013390,0.018884,0.022624
387,2022-05-31,-0.135041,-0.075394,-0.131446,-0.106018,-0.115822,-0.108083,-0.091793,-0.098948,-0.086429,-0.086310
388,2022-06-30,0.113651,0.072262,0.106285,0.074164,0.090240,0.087809,0.114172,0.082073,0.079855,0.075665
389,2022-07-29,0.041513,-0.045561,-0.021248,-0.027243,-0.030543,-0.019900,-0.034934,-0.021946,-0.021790,-0.019528


In [139]:
port_df['SMB_rets'] = port_df['1']-port_df['10']
port_df['SMB_rets'] = port_df['SMB_rets'].transform('shift', periods=1)
port_df

port,date,1,10,2,3,4,5,6,7,8,9,SMB_rets
0,1990-02-28,0.079803,0.027325,0.061317,0.031334,0.028990,0.032949,0.036013,0.015437,0.022438,0.032901,
1,1990-03-30,0.034176,-0.016299,-0.024953,-0.033827,-0.056571,-0.062599,-0.033857,-0.045643,-0.042175,-0.030118,0.052478
2,1990-04-30,0.143240,0.093527,0.095036,0.100996,0.109279,0.106995,0.096825,0.098511,0.090184,0.096043,0.050475
3,1990-05-31,0.034336,-0.001430,0.018967,-0.013480,-0.018185,-0.012922,-0.005931,-0.006232,-0.016742,-0.013181,0.049713
4,1990-06-29,-0.045790,-0.011930,-0.030666,-0.023334,-0.023069,-0.022329,-0.039043,-0.033795,-0.010868,0.008942,0.035766
...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,-0.015961,0.003806,0.008648,0.004065,0.006674,0.027539,-0.004353,0.013390,0.018884,0.022624,0.030762
387,2022-05-31,-0.135041,-0.075394,-0.131446,-0.106018,-0.115822,-0.108083,-0.091793,-0.098948,-0.086429,-0.086310,-0.019766
388,2022-06-30,0.113651,0.072262,0.106285,0.074164,0.090240,0.087809,0.114172,0.082073,0.079855,0.075665,-0.059647
389,2022-07-29,0.041513,-0.045561,-0.021248,-0.027243,-0.030543,-0.019900,-0.034934,-0.021946,-0.021790,-0.019528,0.041388


In [140]:
# calculate cumulative returns
port_df['SMB_cum_rets'] = (1+port_df['SMB_rets']).cumprod()

In [141]:
port_df

port,date,1,10,2,3,4,5,6,7,8,9,SMB_rets,SMB_cum_rets
0,1990-02-28,0.079803,0.027325,0.061317,0.031334,0.028990,0.032949,0.036013,0.015437,0.022438,0.032901,,
1,1990-03-30,0.034176,-0.016299,-0.024953,-0.033827,-0.056571,-0.062599,-0.033857,-0.045643,-0.042175,-0.030118,0.052478,1.052478
2,1990-04-30,0.143240,0.093527,0.095036,0.100996,0.109279,0.106995,0.096825,0.098511,0.090184,0.096043,0.050475,1.105602
3,1990-05-31,0.034336,-0.001430,0.018967,-0.013480,-0.018185,-0.012922,-0.005931,-0.006232,-0.016742,-0.013181,0.049713,1.160564
4,1990-06-29,-0.045790,-0.011930,-0.030666,-0.023334,-0.023069,-0.022329,-0.039043,-0.033795,-0.010868,0.008942,0.035766,1.202073
...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,2022-04-29,-0.015961,0.003806,0.008648,0.004065,0.006674,0.027539,-0.004353,0.013390,0.018884,0.022624,0.030762,300.473882
387,2022-05-31,-0.135041,-0.075394,-0.131446,-0.106018,-0.115822,-0.108083,-0.091793,-0.098948,-0.086429,-0.086310,-0.019766,294.534597
388,2022-06-30,0.113651,0.072262,0.106285,0.074164,0.090240,0.087809,0.114172,0.082073,0.079855,0.075665,-0.059647,276.966515
389,2022-07-29,0.041513,-0.045561,-0.021248,-0.027243,-0.030543,-0.019900,-0.034934,-0.021946,-0.021790,-0.019528,0.041388,288.429692


### Evaluation

In [142]:
px.line(port_df, y='SMB_cum_rets', x='date', labels={'SMB_cum_rets': 'Cumulative returns'}, title = 'Small-minus-Big').show()

In [143]:
px.histogram(port_df[['SMB_rets']]).show()

In [144]:
port_df["date"]= pd.to_datetime(port_df.date)

In [145]:
monthly_rets = port_df[["date", "SMB_rets"]].resample('M', on='date').mean() # Calculate average daily return for each month
monthly_rets['Colour']='red' # Add column with 'red' value
monthly_rets.loc[monthly_rets['SMB_rets']>=0, 'Colour']='green' # Replace 'red' with 'green' where return is non-negative
px.bar(monthly_rets,x=monthly_rets.index,y="SMB_rets").update_traces(marker_color=monthly_rets["Colour"]).show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



### Performance Metrics

In [146]:
mean_ret = port_df['SMB_rets'].mean()
volatility = port_df['SMB_rets'].std()
print(mean_ret, volatility)

0.01679499541626178 0.06454071941385284


In [147]:
print('Annualised expected return is: '+str(round(100*mean_ret*12,2)) +'%')
print('Annualised volatility is: '+str(round(100*volatility*np.sqrt(12),2)) +'%')

Annualised expected return is: 20.15%
Annualised volatility is: 22.36%


In [148]:
(port_df['SMB_rets']*100).describe()

count    390.000000
mean       1.679500
std        6.454072
min      -27.396977
25%       -1.884797
50%        1.025948
75%        4.321357
max       50.630663
Name: SMB_rets, dtype: float64

In [149]:
print(skew(port_df['SMB_rets'].dropna()))

1.6534701851876326


In [150]:
print(kurtosis(port_df["SMB_rets"].dropna()))

11.708275295242158


In [151]:
VaR = port_df['SMB_rets'].quantile(0.005)
VaR

-0.13945903471149376

In [152]:
for q in [0.1, 0.05, 0.01, 0.001, 0.0001]:
    VaR = port_df['SMB_rets'].quantile(q)
    print('There is a '+str(round(100*q,3))+'% chance that my losses will be '+str(round(VaR*100, 2))+'% or worse over the next day.')

There is a 10.0% chance that my losses will be -4.65% or worse over the next day.
There is a 5.0% chance that my losses will be -6.71% or worse over the next day.
There is a 1.0% chance that my losses will be -12.77% or worse over the next day.
There is a 0.1% chance that my losses will be -22.2% or worse over the next day.
There is a 0.01% chance that my losses will be -26.88% or worse over the next day.


In [153]:
VaR = port_df['SMB_rets'].quantile(0.005)
rets_tail = port_df.loc[port_df['SMB_rets']<=VaR,["date", "SMB_rets", "SMB_cum_rets"]]
rets_tail

port,date,SMB_rets,SMB_cum_rets
225,2008-11-28,-0.140336,50.693566
361,2020-03-31,-0.27397,90.069278


In [154]:
ES = rets_tail['SMB_rets'].mean()
ES

-0.20715298106156205

In [155]:
for q in [0.1, 0.05, 0.01, 0.001, 0.0001]:
    VaR = port_df['SMB_rets'].quantile(q)
    rets_tail = port_df.loc[port_df['SMB_rets']<=VaR,:]
    ES = rets_tail['SMB_rets'].mean()
    print('I expect to lose on average '+ str(round(ES*100, 2))+'% over 1-day period given that I have exceeded my VaR for '+str(round(100*q,3))+'%.')

I expect to lose on average -8.05% over 1-day period given that I have exceeded my VaR for 10.0%.
I expect to lose on average -10.43% over 1-day period given that I have exceeded my VaR for 5.0%.
I expect to lose on average -17.31% over 1-day period given that I have exceeded my VaR for 1.0%.
I expect to lose on average -27.4% over 1-day period given that I have exceeded my VaR for 0.1%.
I expect to lose on average -27.4% over 1-day period given that I have exceeded my VaR for 0.01%.


In [156]:
# according to bloomberg the risk free rate (treasury yields) 4.76 % (last 30 years)
# calculation of sharpe ratio

rf = 0.0476
sharpe_ratio = (mean_ret-rf)/volatility
sharpe_ratio

-0.47729564937459196

In [157]:
# get SP500 data from 1990-01-31 to 2022-08-30	
sp500_ticker = yf.Ticker("^GSPC")
sp500 = sp500_ticker.history(start="1990-02-28", end="2022-09-30", interval="1mo")
sp500 = sp500[["Close"]]
sp500["Rets"] = sp500.Close.pct_change()

In [158]:
sp500

Unnamed: 0_level_0,Close,Rets
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1990-03-01 00:00:00-05:00,339.940002,
1990-04-01 00:00:00-05:00,330.799988,-0.026887
1990-05-01 00:00:00-04:00,361.230011,0.091989
1990-06-01 00:00:00-04:00,358.019989,-0.008886
1990-07-01 00:00:00-04:00,356.149994,-0.005223
...,...,...
2022-05-01 00:00:00-04:00,4132.149902,0.000053
2022-06-01 00:00:00-04:00,3785.379883,-0.083920
2022-07-01 00:00:00-04:00,4130.290039,0.091116
2022-08-01 00:00:00-04:00,3955.000000,-0.042440


In [159]:
# Calculate the average return of the market
average_market_return = sp500.Rets.mean()

# Calculate the covariance between portfolio and market returns
covariance = port_df['SMB_rets'].cov(sp500['Rets'].reset_index(drop=True))

# Calculate the variance of the market returns
market_variance = sp500['Rets'].var()

# Calculate the portfolio's beta
portfolio_beta = covariance / market_variance

# Calculate Jensen's Alpha
jensens_alpha = mean_ret - (rf + portfolio_beta * (average_market_return - rf))

print("Portfolio Beta:", portfolio_beta)
print("Jensen's Alpha:", jensens_alpha)

Portfolio Beta: 0.03904371995680168
Jensen's Alpha: -0.029219328407257692


In [160]:
# treynor ratio
treynor_ratio = (mean_ret - rf)/portfolio_beta
treynor_ratio

-0.7889874381288761

In [161]:
# maximum drawdown
peak = port_df.loc[1, "SMB_cum_rets"]
max_drawdown = 0
for ret in port_df["SMB_cum_rets"].dropna():
    if ret >= peak:
        peak = ret
    else:
        drawdown = (peak - ret)/peak
        max_drawdown = max(max_drawdown, drawdown)

print(max_drawdown)

0.6280146640021912


In [162]:
# calmar-ratio
annualised_exp_return = round(mean_ret*12,2)
calmar_ratio = (annualised_exp_return - rf)/max_drawdown
calmar_ratio

0.24266949282488134