### Project 1

#### Part A

Jerônimo de Abreu Afrange

- Select 30 stocks in the S&P 500 universe
- Collect daily returns for the past 10 years (2014-2023)
- Create both value-weighted and equally weighted portfolio
- Rebalance these portfolios in the first day of the month based on the information on the last day (1-day lag)
- Which portfolio has the highest turnover?
- Compute daily returns for both portfolios
- Compute the following statistics for these portfolios:
    - Annualized average return
    - Annualized standar deviation
    - Sharpe ratio
    - Information ratio (vs S&P500)
- Plot cumulative returns for both portfolios, S&P500 and risk-free




In [1]:
import pandas as pd

from matplotlib import pyplot as plt

In [2]:
# AUXILIARY FUNCTIONS

# calculates the month number
def month_number(date):
    return (date.year - 2014) * 12 + date.month - 1

In [3]:
# DATA LOADING AND INITIAL MANIPULATION

# creates the DataFrames for de S&P500 index and de stock prices
stocks_data = pd.read_csv("../data/stock_prices.csv")
index_data = pd.read_csv("../data/sp500_index.csv")

# removes the stocks of which we don't have data from 2014 to the current day
incomplete_stocks = stocks_data["Ticker"].value_counts()
incomplete_stocks = incomplete_stocks[incomplete_stocks < incomplete_stocks.max()]
stocks_data = stocks_data[~stocks_data["Ticker"].isin(incomplete_stocks.index)]

In [4]:
# CREATION OF THE MONTH NUMBER COLUMN

# creation of the month number column
stocks_data["Date"] = pd.to_datetime(stocks_data["Date"])
stocks_data["MONTH_NUMBER"] = stocks_data["Date"].apply(lambda x: month_number(x))

# defines a few constants
INITIAL_DATE = stocks_data["Date"].min()
INITIAL_PORT_VALUE = 1e6

In [5]:
# CREATION OF THE VALUE WEIGHTED PORTFOLIO

# creates the porfolio DataFrame, which is a copy of the current stocks_data DataFrame
portfolio_vw = stocks_data.copy()

# convenience variables
initial_date_filter = portfolio_vw["Date"] == INITIAL_DATE

# creation of the column for the total market cap of the stocks and column of the share of the total
portfolio_vw["TOTAL_MKT_CAP"] = portfolio_vw.groupby("Date")["CUR_MKT_CAP"].transform("sum")
portfolio_vw["MKT_CAP_SHARE"] = portfolio_vw["CUR_MKT_CAP"] / portfolio_vw["TOTAL_MKT_CAP"]

# creation of the initial porfolio stock ammount for each stock and propagation to the entire DataFrame
portfolio_vw.loc[initial_date_filter, "PORT_STOCK_AMT"] = INITIAL_PORT_VALUE * portfolio_vw["MKT_CAP_SHARE"] / portfolio_vw["PX_OPEN"]
stock_amounts = portfolio_vw.loc[initial_date_filter, ["Ticker", "PORT_STOCK_AMT"]]
portfolio_vw = portfolio_vw.drop(columns="PORT_STOCK_AMT")
portfolio_vw = portfolio_vw.merge(stock_amounts, on="Ticker")

# calculation of the values of the individual positions as well as the total portfolio value
portfolio_vw["PORT_STOCK_VAL_LAST"] = portfolio_vw["PX_LAST"] * portfolio_vw["PORT_STOCK_AMT"]
portfolio_vw["PORT_TOTAL_VAL_LAST"] = portfolio_vw.groupby("Date")["PORT_STOCK_VAL_LAST"].transform("sum")

# creates a copy of the current portfolio for future comparison
portfolio_vw_unbalanced = portfolio_vw.copy()

# rebalancing loop
for i in range(1, portfolio_vw["MONTH_NUMBER"].max() + 1):

    # convenience filters
    ending_date_filter = portfolio_vw["Date"] == portfolio_vw[portfolio_vw["MONTH_NUMBER"] == i - 1]["Date"].max()
    initial_date_filter = portfolio_vw["Date"] == portfolio_vw[portfolio_vw["MONTH_NUMBER"] == i]["Date"].min()
    current_month_filter = portfolio_vw["MONTH_NUMBER"] >= i
    
    # rebalancing of the number of shares and propagation to the rest of the DataFrame
    current_portfolio_value = portfolio_vw[ending_date_filter]["PORT_TOTAL_VAL_LAST"].iloc[0]
    portfolio_vw.loc[initial_date_filter, "PORT_STOCK_AMT"] = current_portfolio_value * portfolio_vw["MKT_CAP_SHARE"] / portfolio_vw["PX_OPEN"]
    stock_amounts = portfolio_vw.loc[initial_date_filter, ["Ticker", "PORT_STOCK_AMT"]]
    stock_amounts["MONTH_NUMBER"] = i
    portfolio_vw = pd.merge(portfolio_vw, stock_amounts, on=["Ticker", "MONTH_NUMBER"], how="left", suffixes=("", "_new"))
    portfolio_vw["PORT_STOCK_AMT"] = portfolio_vw["PORT_STOCK_AMT_new"].combine_first(portfolio_vw["PORT_STOCK_AMT"])
    portfolio_vw.drop(columns=["PORT_STOCK_AMT_new"], inplace=True)

    # calculating the new portfolio values
    portfolio_vw.loc[current_month_filter, "PORT_STOCK_VAL_LAST"] = portfolio_vw["PX_LAST"] * portfolio_vw["PORT_STOCK_AMT"]
    portfolio_vw.loc[current_month_filter, "PORT_TOTAL_VAL_LAST"] = portfolio_vw.groupby("Date")["PORT_STOCK_VAL_LAST"].transform("sum")

portfolio_vw["PORT_SHARE"] = portfolio_vw["PORT_STOCK_VAL_LAST"] / portfolio_vw["PORT_TOTAL_VAL_LAST"]

In [6]:
# CREATION OF THE EQUALLY WEIGHTED PORTFOLIO

# creates the porfolio DataFrame, which is a copy of the current stocks_data DataFrame
portfolio_ew = stocks_data.copy()

# convenience variables
initial_date_filter = portfolio_ew["Date"] == INITIAL_DATE

# creation of the initial porfolio stock ammount for each stock and propagation to the entire DataFrame
number_of_companies = portfolio_ew["Ticker"].nunique()
portfolio_ew.loc[initial_date_filter, "PORT_STOCK_AMT"] = INITIAL_PORT_VALUE / number_of_companies / portfolio_ew["PX_OPEN"]
stock_amounts = portfolio_ew.loc[initial_date_filter, ["Ticker", "PORT_STOCK_AMT"]]
portfolio_ew = portfolio_ew.drop(columns="PORT_STOCK_AMT")
portfolio_ew = portfolio_ew.merge(stock_amounts, on="Ticker")

# calculation of the values of the individual positions as well as the total portfolio value
portfolio_ew["PORT_STOCK_VAL_LAST"] = portfolio_ew["PX_LAST"] * portfolio_ew["PORT_STOCK_AMT"]
portfolio_ew["PORT_TOTAL_VAL_LAST"] = portfolio_ew.groupby("Date")["PORT_STOCK_VAL_LAST"].transform("sum")

# creates a copy of the current portfolio for future comparison
portfolio_ew_unbalanced = portfolio_ew.copy()

# rebalancing loop
for i in range(1, portfolio_ew["MONTH_NUMBER"].max() + 1):

    # convenience filters
    ending_date_filter = portfolio_ew["Date"] == portfolio_ew[portfolio_ew["MONTH_NUMBER"] == i - 1]["Date"].max()
    initial_date_filter = portfolio_ew["Date"] == portfolio_ew[portfolio_ew["MONTH_NUMBER"] == i]["Date"].min()
    current_month_filter = portfolio_ew["MONTH_NUMBER"] >= i
    
    # rebalancing of the number of shares and propagation to the rest of the DataFrame
    current_portfolio_value = portfolio_ew[ending_date_filter]["PORT_TOTAL_VAL_LAST"].iloc[0]
    portfolio_ew.loc[initial_date_filter, "PORT_STOCK_AMT"] = current_portfolio_value / number_of_companies / portfolio_ew["PX_OPEN"]
    stock_amounts = portfolio_ew.loc[initial_date_filter, ["Ticker", "PORT_STOCK_AMT"]]
    stock_amounts["MONTH_NUMBER"] = i
    portfolio_ew = pd.merge(portfolio_ew, stock_amounts, on=["Ticker", "MONTH_NUMBER"], how="left", suffixes=("", "_new"))
    portfolio_ew["PORT_STOCK_AMT"] = portfolio_ew["PORT_STOCK_AMT_new"].combine_first(portfolio_ew["PORT_STOCK_AMT"])
    portfolio_ew.drop(columns=["PORT_STOCK_AMT_new"], inplace=True)

    # calculating the new portfolio values
    portfolio_ew.loc[current_month_filter, "PORT_STOCK_VAL_LAST"] = portfolio_ew["PX_LAST"] * portfolio_ew["PORT_STOCK_AMT"]
    portfolio_ew.loc[current_month_filter, "PORT_TOTAL_VAL_LAST"] = portfolio_ew.groupby("Date")["PORT_STOCK_VAL_LAST"].transform("sum")

portfolio_ew["PORT_SHARE"] = portfolio_ew["PORT_STOCK_VAL_LAST"] / portfolio_ew["PORT_TOTAL_VAL_LAST"]

In [7]:
# CALCULATION OF THE TURNOVER RATE OF BOTH PORTFOLIOS

portfolio_vw_x = portfolio_vw.copy()

# extracts the first and last day of each month
first_dates_of_month = list()
last_dates_of_month = list()
for i in range(stocks_data["MONTH_NUMBER"].max() + 1):
    first_dates_of_month.append(stocks_data[stocks_data["MONTH_NUMBER"] == i]["Date"].min())
    last_dates_of_month.append(stocks_data[stocks_data["MONTH_NUMBER"] == i]["Date"].max())

# gets the difference in stock amount at the turn of each month for each stock
portfolio_vw_x["PORT_STOCK_AMT_DIFF"] = 0
for i in range(len(first_dates_of_month)):
    initial_amt = portfolio_vw_x.loc[portfolio_vw_x["Date"] == first_dates_of_month[i], ["PORT_STOCK_AMT", "Ticker"]].reset_index(drop=True)
    try: final_amt = portfolio_vw_x.loc[portfolio_vw_x["Date"] == last_dates_of_month[i + 1], ["PORT_STOCK_AMT", "Ticker"]].reset_index(drop=True)
    except IndexError: break
    difference = pd.merge(initial_amt, final_amt, on="Ticker", suffixes=["_0", "_1"])
    difference["PORT_STOCK_AMT_DIFF"] = difference["PORT_STOCK_AMT_1"] - difference["PORT_STOCK_AMT_0"]
    difference = difference.drop(columns=["PORT_STOCK_AMT_0", "PORT_STOCK_AMT_1"])
    difference["MONTH_NUMBER"] = i + 1
    portfolio_vw_x = pd.merge(portfolio_vw_x, difference, how="left", on=["Ticker", "MONTH_NUMBER"], suffixes=["_old", "_inc"])
    portfolio_vw_x["PORT_STOCK_AMT_DIFF"] = portfolio_vw_x["PORT_STOCK_AMT_DIFF_inc"].combine_first(portfolio_vw_x["PORT_STOCK_AMT_DIFF_old"])
    portfolio_vw_x = portfolio_vw_x.drop(columns=["PORT_STOCK_AMT_DIFF_old", "PORT_STOCK_AMT_DIFF_inc"])

portfolio_vw = portfolio_vw_x