In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from pandas_datareader import data as web
from pathlib import Path
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

In [2]:
#================
# Factors
#================

# 1. Fama French
fama_french_data = web.DataReader('F-F_Research_Data_5_Factors_2x3', 'famafrench',  start='2015-01-01', end='2024-12-31')[0]
fama_french_data.index = fama_french_data.index.to_timestamp() + pd.offsets.MonthEnd(0)
for col in fama_french_data.columns:
    if col == 'RF': 
        fama_french_data[col] = fama_french_data[col] / 10
    else:
        fama_french_data[col] = fama_french_data[col]/100

# 2. Market Data
market_tickers = ['^GSPC', '^VIX', 'GC=F', 'CL=F']
rename_dict = {'^GSPC': 'SP500', '^VIX': 'VIX', 'GC=F': 'Gold', 'CL=F': 'Crude_Oil'}
market_data = yf.download(market_tickers, start='2015-01-01', end='2024-12-31', 
                          auto_adjust=True, progress=False)[['Close', 'Volume']]
market_data = market_data.rename(columns=rename_dict)

# Convert to monthly frequency
market_data = market_data.resample('M').last()  # Use the last closing price for each month
# Calculate metrics
market_returns = market_data.drop(columns=[('Close','VIX'), 'Volume']).pct_change().rename(columns=lambda x: f"{x}_Return")
market_data = market_data.join(market_returns)
#market_data['SP500_Momentum'] = market_data[('Close_Return','SP500_Return')].shift(12) / market_data[('Close_Return','SP500_Return')].shift(24) - 1
#market_data['SP500_Volatility'] = market_data[('Close_Return','SP500_Return')].rolling(window=30).std()
market_data['SP500_Illiquidity'] = abs(market_data[('Close_Return','SP500_Return')]) / market_data[('Volume','SP500')]

market_data.columns = [
    col[1] if col[0] != 'Volume' and col[1] != '' else col[0]
    for col in market_data.columns
]
market_data = market_data.loc[:, ~market_data.columns.str.contains('Volume')]


# 3. Macroeconomic Data 
macro_data = []
directory = Path('C:/Users/Kathe/Documents/Python/Quantitative Finance/Portfolio Risk Analysis/')

for file_path in directory.glob('macro*.csv'):
    df = pd.read_csv(file_path)
    file_name = file_path.stem.lower()
            
    date_col = 'observation_date' if 'observation_date' in df.columns else 'date'
    df[date_col] = pd.to_datetime(df[date_col])
    df.set_index(date_col, inplace=True)
            
    # Convert to monthly frequency
    df = df.resample('M').last() 
    df = df.interpolate(method='time', limit_direction='both')
            
    # Calculate changes
    value_col = df.columns[-1]
    if 'cpi' in file_name:
        df['Inflation'] = df[value_col].pct_change(12)
    elif 'treasury' in file_name:
        df['Yield_Change'] = df[value_col].diff()/100
            
    macro_data.append(df)

# Combine all macro data
if macro_data:
    macro_df = pd.concat(macro_data, axis=1)
    macro_df = macro_df.sort_index().loc['2015-01-01':'2024-12-31']
    macro_df = macro_df.groupby(macro_df.index).first()  
    macro_df = macro_df.ffill().dropna()
        
    colname_dict = {
        col: 'CPI' if 'cpi' in col.lower() else
             'GDP_Growth' if 'nbea' in col.lower() else
             'Unemp_Rate' if 'unemp' in col.lower() else
             'Credit_Spread' if 'baa' in col.lower() else
             col
        for col in macro_df.columns
    }
    macro_df = macro_df.rename(columns=colname_dict)
else:
    macro_df = pd.DataFrame()
macro_df['GDP_Growth'] = macro_df['GDP_Growth']/100
macro_df['T10Y2Y'] = macro_df['T10Y2Y']/100
macro_df['Credit_Spread'] = macro_df['Credit_Spread']/100
    
# 4. Combine all factor data
factors_df = pd.concat([fama_french_data, market_data,macro_df], axis=1).sort_index().ffill().dropna()
factors_df.index.name = 'Date'
factors_df

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF,Crude_Oil,Gold,SP500,VIX,Crude_Oil_Return,Gold_Return,SP500_Return,SP500_Illiquidity,CPI,Inflation,Credit_Spread,GDP_Growth,T10Y2Y,Yield_Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2015-02-28,0.0613,0.0032,-0.0186,-0.0112,-0.0182,0.000,49.759998,1212.599976,2104.500000,13.34,0.031509,-0.051545,0.054893,1.547410e-11,235.342,-0.000870,0.0246,0.037798,0.0137,0.0016
2015-03-31,-0.0112,0.0307,-0.0038,0.0009,-0.0052,0.000,47.599998,1183.099976,2067.889893,15.29,-0.043408,-0.024328,-0.017396,5.152036e-12,235.976,-0.000220,0.0255,0.035360,0.0138,0.0001
2015-04-30,0.0059,-0.0309,0.0182,0.0006,-0.0061,0.000,59.630001,1182.400024,2085.510010,14.55,0.252731,-0.000592,0.008521,1.889451e-12,236.222,-0.001040,0.0260,0.033000,0.0147,0.0009
2015-05-31,0.0136,0.0084,-0.0115,-0.0180,-0.0075,0.000,60.299999,1189.400024,2107.389893,13.84,0.011236,0.005920,0.010491,2.671337e-12,237.001,0.000350,0.0272,0.029967,0.0151,0.0004
2015-06-30,-0.0153,0.0290,-0.0079,0.0044,-0.0158,0.000,59.470001,1171.500000,2063.110107,18.23,-0.013764,-0.015050,-0.021012,5.151763e-12,237.657,0.001796,0.0283,0.027033,0.0171,0.0020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,0.0161,-0.0365,-0.0113,0.0085,0.0086,0.048,73.550003,2493.800049,5648.399902,15.00,-0.055962,0.027735,0.022835,5.455214e-12,314.131,0.026109,0.0169,0.026326,0.0000,0.0020
2024-09-30,0.0174,-0.0102,-0.0259,0.0004,-0.0026,0.040,68.169998,2636.100098,5762.479980,16.73,-0.073148,0.057062,0.020197,4.563515e-12,314.851,0.024325,0.0163,0.025674,0.0015,0.0015
2024-10-31,-0.0097,-0.0088,0.0089,-0.0138,0.0103,0.039,69.260002,2738.300049,5705.450195,23.16,0.015989,0.038769,-0.009897,2.236219e-12,315.564,0.025714,0.0146,0.025000,0.0012,-0.0003
2024-11-30,0.0651,0.0478,-0.0005,-0.0262,-0.0217,0.040,68.000000,2657.000000,6032.379883,13.51,-0.018192,-0.029690,0.057301,2.344167e-11,316.449,0.027142,0.0144,0.025000,0.0005,-0.0007


In [3]:
# Obtain historical stock data 
portfolio_stocks = {    
    'Technology': ['NVDA', 'AMD', 'TSLA'],
    'Financials': ['GS', 'MS'],
    'Healthcare': ['UNH', 'DHR', 'LLY'],
    'Consumer Staples': ['TGT', 'WMT'],
    'Consumer Discretionary': ['AMZN', 'SBUX'],
    'Industrials': ['CAT', 'FDX'],
    'Materials': ['FCX','NUE'],
    'Energy': ['XOM', 'OXY'],
    'Communication': ['META', 'BKNG']
}

all_tickers = [ticker for sector in portfolio_stocks.values() for ticker in sector]
stock_data = yf.download(all_tickers, start='2015-01-01', end='2024-12-31', auto_adjust=True,progress=False)['Close']    
stock_data = stock_data.resample('M').last()
stock_data.dropna(inplace=True)

#====================
# Portfolio Details
#====================
# Initialize portfolio
portfolio_details = pd.DataFrame()
initial_weights = {ticker: 1/len(all_tickers) for ticker in all_tickers} # assuming equal weights to start with

for date in stock_data.index:
    date_prices = stock_data.loc[date]
    
    # Create a row for each stock (dates * tickers)
    for ticker in all_tickers:
        sector = [k for k, v in portfolio_stocks.items() if ticker in v][0]
            
        portfolio_details = pd.concat([
            portfolio_details,
            pd.DataFrame({
                'Date': [date],
                'Ticker': [ticker],
                'Sector': [sector],
                'Price': [date_prices[ticker]],
                'Shares': [np.nan], 
                'Weight': [initial_weights[ticker]],
                'Return': [np.nan] 
            })
        ])
    
# Calculate monthly returns 
portfolio_details['Return'] = portfolio_details.groupby('Ticker')['Price'].pct_change()
    
# Calculate shares based on initial $1M portfolio
initial_portfolio_value = 1_000_000
first_date = portfolio_details['Date'].min()
first_date_prices = portfolio_details[portfolio_details['Date'] == first_date].set_index('Ticker')['Price']

# Calculate shares for each stock (rounded, and assume constant overtime in this portfolio)
initial_shares = {}
for ticker in all_tickers:
    initial_weight = initial_weights[ticker]
    initial_price = first_date_prices[ticker]
    shares = round((initial_portfolio_value * initial_weight) / initial_price)
    initial_shares[ticker] = max(shares, 1)  

for ticker in all_tickers:
    portfolio_details.loc[portfolio_details['Ticker'] == ticker, 'Shares'] = initial_shares[ticker]
    
# Recalculate portfolio value and weights
portfolio_details['Market_Value'] = portfolio_details['Price'] * portfolio_details['Shares']
date_totals = portfolio_details.groupby('Date')['Market_Value'].transform('sum')
portfolio_details['Weight'] = portfolio_details['Market_Value'] / date_totals
initial_value = portfolio_details[portfolio_details['Date'] == first_date]['Market_Value'].sum()

portfolio_details = portfolio_details.set_index('Date').sort_index()
portfolio_details.dropna(inplace=True)
portfolio_details

Unnamed: 0_level_0,Ticker,Sector,Price,Shares,Weight,Return,Market_Value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-02-28,NVDA,Technology,0.531456,108512.0,0.053586,0.153382,57669.301041
2015-02-28,AMD,Technology,3.110000,19455.0,0.056221,0.210117,60505.047959
2015-02-28,TSLA,Technology,13.556000,3684.0,0.046404,-0.001277,49940.303101
2015-02-28,GS,Financials,155.491211,355.0,0.051291,0.104262,55199.379883
2015-02-28,MS,Financials,27.190979,1947.0,0.049192,0.058563,52940.836121
...,...,...,...,...,...,...,...
2024-12-31,NUE,Materials,114.789948,1472.0,0.007322,-0.251018,168970.802734
2024-12-31,XOM,Energy,104.822395,892.0,0.004052,-0.103425,93501.576630
2024-12-31,OXY,Energy,48.213799,856.0,0.001788,-0.037568,41271.011536
2024-12-31,META,Communication,590.714417,662.0,0.016946,0.030291,391052.943726


In [4]:
#====================
# Portfolio Holdings
#====================

portfolio_summary = pd.DataFrame()
    
for date in stock_data.index:
    date_data = portfolio_details[portfolio_details.index == date]
        
    # Calculate sector allocations
    sector_allocation = date_data.groupby('Sector')['Market_Value'].sum() / date_data['Market_Value'].sum()
        
    # Calculate portfolio metrics
    portfolio_return = (date_data['Return'] * date_data['Weight'].shift(1)).sum()
    portfolio_value = date_data['Market_Value'].sum()
        
    summary_row = {
        'Date': date,
        'Portfolio_Value': portfolio_value,
        'Portfolio_Return': portfolio_return,
        'Number_of_Holdings': len(date_data),
        'Volatility_Annualized': np.nan  
    }

    for sector, alloc in sector_allocation.items():
        summary_row[f'Sector_{sector}'] = alloc
        
    portfolio_summary = pd.concat([
        portfolio_summary,
        pd.DataFrame(summary_row, index=[0])
    ], ignore_index=True)
    
# Calculate turnover (simplified - would normally compare with previous holdings)
portfolio_summary['Volatility_Annualized'] = portfolio_summary['Portfolio_Return'].rolling(12).std() * np.sqrt(12)
    
portfolio_summary = portfolio_summary.set_index('Date').sort_index()
portfolio_summary.dropna(inplace=True)
portfolio_summary

Unnamed: 0_level_0,Portfolio_Value,Portfolio_Return,Number_of_Holdings,Volatility_Annualized,Sector_Communication,Sector_Consumer Discretionary,Sector_Consumer Staples,Sector_Energy,Sector_Financials,Sector_Healthcare,Sector_Industrials,Sector_Materials,Sector_Technology
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2015-12-31,1.106723e+06,-0.002415,20,0.130721,0.119287,0.148903,0.079331,0.081420,0.090892,0.156369,0.079553,0.061688,0.182557
2016-01-31,1.019757e+06,-0.077503,20,0.155485,0.124154,0.150146,0.088967,0.089084,0.084780,0.161202,0.078522,0.059086,0.164060
2016-02-29,1.037763e+06,0.036239,20,0.142925,0.128185,0.140212,0.091909,0.089331,0.078374,0.157227,0.081734,0.067195,0.165833
2016-03-31,1.134975e+06,0.099231,20,0.173281,0.122409,0.134837,0.087830,0.083688,0.074009,0.151212,0.086654,0.077376,0.181984
2016-04-30,1.193080e+06,0.059554,20,0.180003,0.120637,0.132691,0.081084,0.086559,0.074944,0.148055,0.084089,0.085366,0.186576
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,2.091123e+07,0.017802,20,0.272883,0.025751,0.030316,0.014512,0.007246,0.018085,0.072847,0.018543,0.017486,0.795214
2024-09-30,2.167657e+07,0.096826,20,0.278319,0.027133,0.030451,0.014441,0.006758,0.017257,0.067931,0.018824,0.017651,0.799554
2024-10-31,2.237623e+07,-0.084542,20,0.283640,0.027174,0.029520,0.013873,0.006475,0.018182,0.061074,0.017749,0.015849,0.810104
2024-11-30,2.339762e+07,0.027858,20,0.231941,0.027297,0.031141,0.013596,0.006290,0.020022,0.057911,0.018425,0.015843,0.809474


In [5]:
factors_df.to_csv('factors.csv')
portfolio_details.to_csv('portfolio_details.csv')
portfolio_summary.to_csv("portfolio_summary.csv")