In [1]:
from pathlib import Path
import pandas as pd
import pandas_datareader.data as web
import datetime as dt
import yfinance as yf
import numpy as np

import warnings
warnings.filterwarnings('ignore')

##### Portfolio

In [2]:
# Load portfolio data
df_holding = pd.read_excel('portfolio.xls')
tickers = df_holding['Ticker'].tolist()
shares = df_holding.set_index('Ticker')['Shares']

# Fetch sector information using yfinance
def get_sector_info(ticker):
    try:
        return yf.Ticker(ticker).info.get('sector', 'Unknown')
    except Exception as e:
        logging.warning(f"Error fetching sector info for {ticker}: {e}")
        return 'Unknown'

# Map tickers to their sectors
sector_mapping = {ticker: get_sector_info(ticker) for ticker in tickers}

# Download price data
price_data = yf.download(tickers, start='2016-10-01', end='2023-12-31',
                         auto_adjust=True, progress=False)['Close']
price_data = price_data.bfill()

# Calculate daily portfolio value
df_portfolio_value_daily = price_data.multiply(shares, axis=1)
df_portfolio_value_daily['Portfolio'] = df_portfolio_value_daily.sum(axis=1)

# Calculate stock momentum (12-month rolling return) and volatility (annualized 12-month std deviation)
daily_returns = df_portfolio_value_daily.pct_change().dropna()
momentum = daily_returns.rolling(window=252).mean().dropna()  # Approx. 252 trading days in a year
volatility = (daily_returns.rolling(window=252).std() * (252 ** 0.5)).dropna()

# Calculate sector-level values and weights
df_sector = pd.DataFrame.from_dict(sector_mapping, orient='index', columns=['Sector'])
sector_value = df_portfolio_value_daily.groupby(df_sector['Sector'], axis=1).sum()
sector_weight = sector_value.div(df_portfolio_value_daily['Portfolio'], axis=0)


# Change daily data into monthly data
df_portfolio_value_monthly = pd.DataFrame(df_portfolio_value_daily['Portfolio'].resample('M').last())
df_portfolio_value_monthly = df_portfolio_value_monthly.join(momentum.resample('M').mean().add_suffix('_Mom'))
df_portfolio_value_monthly = df_portfolio_value_monthly.join(volatility.resample('M').mean().add_suffix('_Vol'))
df_portfolio_value_monthly['Portfolio_Rtn'] = df_portfolio_value_monthly['Portfolio'].pct_change()
df_portfolio_value_monthly = df_portfolio_value_monthly.join(sector_weight.resample('M').last().add_suffix('_Wgt'))
df_portfolio_value_monthly = df_portfolio_value_monthly.join(sector_value.resample('M').last().pct_change().add_suffix('_Rtn'))

df_portfolio_value_monthly = df_portfolio_value_monthly.rename(columns={
    col: f'Port_{col}' for col in df_portfolio_value_monthly.columns 
    if ('_Mom' in col or '_Vol' in col or '_Wgt' or 'Rtn'in col) and 'Portfolio' not in col
})
portfolio_df = df_portfolio_value_monthly.dropna()

portfolio_df

Unnamed: 0_level_0,Portfolio,Port_AAPL_Mom,Port_CRM_Mom,Port_CVX_Mom,Port_DIS_Mom,Port_DUK_Mom,Port_EA_Mom,Port_GE_Mom,Port_JNJ_Mom,Port_JPM_Mom,...,Port_Utilities_Wgt,Port_Communication Services_Rtn,Port_Consumer Defensive_Rtn,Port_Energy_Rtn,Port_Financial Services_Rtn,Port_Healthcare_Rtn,Port_Industrials_Rtn,Port_Real Estate_Rtn,Port_Technology_Rtn,Port_Utilities_Rtn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-10-31,248047.082844,0.001364,0.001223,0.000780,0.000363,0.000629,0.001459,-0.000770,0.000801,0.001609,...,0.049577,0.000286,0.019720,-0.000051,0.049287,0.022910,-0.166253,-0.057862,0.101941,0.052312
2017-11-30,253853.665085,0.001899,0.001424,0.000479,0.000334,0.000834,0.001438,-0.001648,0.000871,0.001247,...,0.049405,-0.015338,0.052045,0.023659,0.029481,0.044068,-0.092758,0.034324,0.018890,0.019873
2017-12-31,254701.118908,0.001746,0.001585,0.000365,0.000281,0.000695,0.001264,-0.002108,0.000961,0.001023,...,0.046442,0.015129,0.019079,0.030567,0.015818,-0.010018,-0.039339,0.034963,-0.008206,-0.056851
2018-01-31,266100.428410,0.001621,0.001567,0.000648,0.000214,0.000288,0.001518,-0.002065,0.001030,0.001216,...,0.041487,0.095634,0.027548,0.019890,0.088862,0.039504,-0.073352,-0.063259,0.081834,-0.066699
2018-02-28,254712.703781,0.001027,0.001328,0.000278,-0.000050,0.000042,0.001635,-0.002539,0.000603,0.001146,...,0.042094,-0.047653,-0.101768,-0.109593,-0.006505,-0.051539,-0.120175,-0.071270,0.022574,-0.028803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-31,493557.885284,0.000505,0.000864,0.000320,-0.000883,-0.000520,-0.000138,0.002701,0.000183,0.001270,...,0.032170,-0.080733,-0.029446,0.016665,0.007339,-0.034393,0.001926,-0.076680,-0.027525,-0.040763
2023-09-30,476725.517101,0.000722,0.001576,0.000576,-0.000866,-0.000331,0.000046,0.003095,0.000068,0.001231,...,0.033103,-0.019043,-0.044412,0.051600,-0.050810,0.011058,-0.033465,-0.104229,-0.068525,-0.006081
2023-10-31,478358.655624,0.000926,0.001387,0.000204,-0.000510,0.000114,0.000219,0.002978,-0.000137,0.001156,...,0.033226,0.029230,-0.011968,-0.119247,0.008259,0.019871,-0.017368,-0.045977,0.023011,0.007138
2023-11-30,519033.149567,0.001087,0.001664,-0.000672,-0.000102,-0.000015,0.000245,0.002407,-0.000409,0.000670,...,0.032158,0.120432,0.022396,-0.011889,0.100808,0.028541,0.121237,0.144335,0.155912,0.050149


##### Macro Factors

In [3]:
directory = Path('C:/Users/Kathe/Documents/Python/Quantitative Finance/Multi-Factor Risk Model for Equity Portfolio/')
dataframes = []

# Loop through each CSV file in the directory that starts with 'macro'
for file_path in directory.glob('macro*.csv'):
    df = pd.read_csv(file_path)
    file_name = file_path.stem.lower()
    df['observation_date'] = pd.to_datetime(df['observation_date'])
    
    df.set_index('observation_date', inplace=True)
    df = df.resample('M').mean(numeric_only=True).interpolate(method='linear', limit_direction='both').sort_index()

    # Calculate change metrics
    value_column_name = df.columns[-1]
    if 'cpi' in file_name:
        df['Inflation'] = df[value_column_name].pct_change()
    elif 'treasury' in file_name:
        df['Yield_Change'] = df[value_column_name].diff()

    df.drop(columns=['observation_date'], errors='ignore', inplace=True)
    dataframes.append(df)


macro_df = pd.concat(dataframes)
macro_df = macro_df.sort_index().loc['2017-12-01':'2023-12-31']
macro_df = macro_df.groupby(macro_df.index).ffill()
macro_df.dropna(inplace=True)

colname_dict = {col: 'CPI' if 'cpi' in col.lower() else 'GDP_Growth' if 'nbea' in col.lower() else col for col in macro_df.columns}
macro_df.rename(columns=colname_dict, inplace=True)

macro_df

Unnamed: 0_level_0,CPI,Inflation,GDP_Growth,T10Y2Y,Yield_Change
observation_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-12-31,247.805,0.002107,3.2,0.558500,-0.098167
2018-01-31,248.859,0.004253,3.3,0.551905,-0.006595
2018-02-28,249.529,0.002692,3.3,0.683684,0.131779
2018-03-31,249.577,0.000192,3.3,0.566667,-0.117018
2018-04-30,250.227,0.002604,3.3,0.484762,-0.081905
...,...,...,...,...,...
2023-08-31,306.138,0.005000,3.2,-0.733913,0.195087
2023-09-30,307.374,0.004037,3.2,-0.643000,0.090913
2023-10-31,307.653,0.000908,3.2,-0.269524,0.373476
2023-11-30,308.087,0.001411,3.1,-0.379524,-0.110000


##### Style Factors

In [4]:
start = dt.datetime(2017, 12, 1)
end = dt.datetime(2023, 12, 31)
fama_french_df = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start, end)[0]
fama_french_df.index = fama_french_df.index.to_timestamp() + pd.offsets.MonthEnd(0)
fama_french_df

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-12-31,1.06,-1.32,0.06,0.09
2018-01-31,5.57,-3.12,-1.28,0.12
2018-02-28,-3.65,0.26,-1.04,0.11
2018-03-31,-2.35,4.06,-0.20,0.11
2018-04-30,0.29,1.13,0.54,0.14
...,...,...,...,...
2023-08-31,-2.39,-3.20,-1.08,0.45
2023-09-30,-5.24,-2.49,1.45,0.43
2023-10-31,-3.18,-3.88,0.19,0.47
2023-11-30,8.83,-0.03,1.66,0.44


##### Sector Factors

In [5]:
gics_etfs = {
    'Communication Services': 'XLC',
    'Consumer Discretionary': 'XLY',  # Corrected ticker here
    'Consumer Staples': 'XLP',
    'Energy': 'XLE',
    'Financials': 'XLF',
    'Healthcare': 'XLV',
    'Industrials': 'XLI',
    'Information Technology': 'XLK',
    'Materials': 'XLB',
    'Real Estate': 'XLRE',
    'Utilities': 'XLU'
}

# Download sector ETF data
start_date = '2017-10-01'
end_date = '2023-12-31'
sector_prices = yf.download(list(gics_etfs.values()), start=start_date, end=end_date, auto_adjust=True, progress=False)['Close']
sector_prices.bfill(inplace=True)

# Calculate monthly returns
sector_df = sector_prices.resample('M').last().pct_change()
sector_df.columns = [f"{sector}_Rtn" for sector in gics_etfs.keys()]
sector_df.dropna(inplace=True)

sector_df

Unnamed: 0_level_0,Communication Services_Rtn,Consumer Discretionary_Rtn,Consumer Staples_Rtn,Energy_Rtn,Financials_Rtn,Healthcare_Rtn,Industrials_Rtn,Information Technology_Rtn,Materials_Rtn,Real Estate_Rtn,Utilities_Rtn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-11-30,0.009492,0.000000,0.017523,0.034586,0.041661,0.014138,0.055755,0.029231,0.026851,0.029096,0.050658
2017-12-31,0.021269,0.000000,0.052694,0.018882,0.021085,0.005458,0.022530,-0.004382,-0.060582,-0.005505,0.024583
2018-01-31,0.039980,0.000000,0.035843,0.065568,0.053654,0.070367,0.016347,-0.019126,-0.031131,0.065554,0.092411
2018-02-28,-0.052740,0.000000,-0.108350,-0.029254,-0.038630,-0.004090,-0.076271,-0.068090,-0.038793,-0.044949,-0.034691
2018-03-31,-0.041463,0.000000,0.017230,-0.041553,-0.026883,-0.037338,-0.009069,0.038290,0.037965,-0.029166,-0.023847
...,...,...,...,...,...,...,...,...,...,...,...
2023-08-31,-0.033015,-0.015409,0.016461,-0.026889,-0.019833,-0.015083,-0.039472,-0.030636,-0.061279,-0.007007,-0.017440
2023-09-30,-0.047790,-0.029476,0.024033,-0.030882,-0.059475,-0.064783,-0.047879,-0.072349,-0.056424,-0.029592,-0.055320
2023-10-31,-0.031700,-0.012963,-0.057528,-0.024420,-0.029789,0.000488,-0.013806,-0.028471,0.012897,-0.032624,-0.055162
2023-11-30,0.083487,0.078028,-0.007161,0.109394,0.088349,0.128956,0.041261,0.124773,0.051432,0.054360,0.109665


##### Final Dataset

In [6]:
final_df = pd.concat([portfolio_df, macro_df, fama_french_df, sector_df], axis=1)
final_df.dropna(inplace=True)
final_df.to_csv("final_data.csv")