In [1]:
import os
import time
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import warnings
import seaborn as sns
warnings.filterwarnings("ignore")

import allocation
import portfolio_summary
import utils as hrp_utils

In [2]:
import importlib
importlib.reload(allocation)
importlib.reload(hrp_utils)
importlib.reload(portfolio_summary)

<module 'portfolio_summary' from '/Users/vdp/projects/hierarchical_risk_parity/portfolio_summary.py'>

In [3]:
# directory information
CUR_DIR = os.getcwd()
DATA_FOLDER = os.path.join(CUR_DIR, "data")
ALL_DATA = os.path.join(DATA_FOLDER, "dataset.feather")
SNP_CONSTITUENTS = os.path.join(DATA_FOLDER, "snp_constituents.json")

## Loading Data

#### NOTE: SNP Constituents are calculated at the start of the each year and remain the same for entire year

In [4]:
df = pd.read_feather(ALL_DATA)
df.head()

Unnamed: 0,date,ticker,permno,ret,mkt_cap,shrout,prc,year
0,2000-01-03,ROK,84381,0.031332,9392211.25,190222.0,49.375,2000
1,2000-01-04,ROK,84381,-0.036709,9047433.875,190222.0,47.5625,2000
2,2000-01-05,ROK,84381,0.005256,9094989.375,190222.0,47.8125,2000
3,2000-01-06,ROK,84381,-0.016993,8940434.0,190222.0,47.0,2000
4,2000-01-07,ROK,84381,0.027926,9190100.375,190222.0,48.3125,2000


#### Filtering for required data

In [5]:
data_df = df[["date", "permno", "ret", "year", "mkt_cap"]]
data_df.head()

Unnamed: 0,date,permno,ret,year,mkt_cap
0,2000-01-03,84381,0.031332,2000,9392211.25
1,2000-01-04,84381,-0.036709,2000,9047433.875
2,2000-01-05,84381,0.005256,2000,9094989.375
3,2000-01-06,84381,-0.016993,2000,8940434.0
4,2000-01-07,84381,0.027926,2000,9190100.375


#### Cleaning the dataset for each year

In [6]:
data_df = data_df.groupby("year").apply(lambda x: hrp_utils.clean_dataset(x)).reset_index(drop=True)
data_df.head()

Unnamed: 0,date,permno,ret,year,mkt_cap
0,2000-01-03,84381,0.031332,2000,9392211.25
1,2000-01-04,84381,-0.036709,2000,9047433.875
2,2000-01-05,84381,0.005256,2000,9094989.375
3,2000-01-06,84381,-0.016993,2000,8940434.0
4,2000-01-07,84381,0.027926,2000,9190100.375


### Bases Cases: 
    1. Market weighted returns
    2. Equal weighted returns

#### Market weights are calculated based on latest previous year (lagged) market cap for each stock

In [7]:
mkt_df = data_df.drop_duplicates(subset=["year", "permno"], keep="last")[["year", "permno", "mkt_cap"]].reset_index(drop=True)
mkt_df["lag_mkt_cap"] =  mkt_df.groupby(["permno"])["mkt_cap"].shift()
mkt_df = mkt_df.dropna().reset_index(drop=True)
mkt_df.head()

Unnamed: 0,year,permno,mkt_cap,lag_mkt_cap
0,2001,84381,3285829.0,8701516.0
1,2001,81061,10696400.0,10190930.0
2,2001,85914,15704850.0,6112195.0
3,2001,21207,3748044.0,2869930.0
4,2001,87800,2240735.0,16036480.0


#### Using compute functions form allocation library to get market and equal weights

In [8]:
mkt_wgt_df = mkt_df.groupby("year").apply(lambda x: allocation.compute_mkt_weights(x)).reset_index()
eq_wgt_df = mkt_df.groupby("year").apply(lambda x: allocation.compute_unif_weights(x)).reset_index()

#### Calculating daily returns for the market weighted and equal weighted portfolio starting 2001

In [9]:
base_df = data_df.merge(mkt_wgt_df, on=["year", "permno"], how="left")
base_df = base_df[base_df["year"] > 2000]

base_df = base_df.merge(eq_wgt_df, on=["year", "permno"], how="left")
base_df["MKT_ret"] = base_df["ret"] * base_df["MKT"]
base_df["EQ_ret"] = base_df["ret"] * base_df["EQ"]

# daily returns of the portfolio
base_ret_df = base_df.groupby("date")["MKT_ret", "EQ_ret"].sum()
base_ret_df

Unnamed: 0_level_0,MKT_ret,EQ_ret
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2001-01-02,-0.028877,-0.028219
2001-01-03,0.055665,0.046295
2001-01-04,-0.012854,-0.009114
2001-01-05,-0.025212,-0.020683
2001-01-08,-0.001344,0.002724
...,...,...
2022-12-23,0.004794,0.006778
2022-12-27,-0.007404,0.000080
2022-12-28,-0.011456,-0.013615
2022-12-29,0.020966,0.017973


## HRP Performance with Different Linkages

In [10]:
linkage_types = ["single", "complete", "ward", "average"]

### Splitting dataset by year, getting weights based on previous year correlations, measuring weighted returns for current year 

In [None]:
%%time
test_df_lst = list()

for year in range(2001, 2023):
    test_df = data_df[data_df["year"] == year].set_index("permno")
    stocks = test_df.index
    
    form_df = data_df[data_df["year"] == (year - 1)]
    form_df = form_df[form_df["permno"].isin(stocks)]
    
    weights_list = list()
    for linkage_type in linkage_types:
        weight_df = allocation.compute_hrp_weights(df=form_df, linkage_type=linkage_type).to_frame()
        test_df = test_df.join(weight_df)
        
        wgt_ret = "HRP_" + linkage_type.upper() + "_ret"
        test_df[wgt_ret] = test_df["ret"] * test_df[f"HRP_{linkage_type}_wgt"]
    
    test_df_lst.append(test_df)

### Portfolio daily returns using different linkages for HRP

In [None]:
ret_df = pd.concat(test_df_lst, ignore_index=True, axis=0)
wgt_ret_cols = ["HRP_" + linkage_type.upper() + "_ret" for linkage_type in linkage_types]

daily_ret_df = ret_df.groupby("date")[wgt_ret_cols].sum()
daily_ret_df

#### Adding base cases to the returns dataframe

In [None]:
daily_ret_df = base_ret_df.join(daily_ret_df)
daily_ret_df

### Cumulative daily returns

In [None]:
cum_ret_df = ((1 + daily_ret_df).cumprod() - 1) * 100
cum_ret_df

#### Plotting cumulative returns

In [None]:
cum_ret_df.plot(figsize=(10,6))
plt.title("SNP500 Constituents Cumulative Performance (2001 - 2022)")
plt.xlabel("Year")
plt.ylabel("Return (%)")
plt.show()

## Summarising portfolio performance statistic across 2001 - 2022 period

In [None]:
summary_df = portfolio_summary.compute_portfolio_summary(ret_df=daily_ret_df)
summary_df