In [1]:
from pathlib import Path
import pandas as pd
import pandas_datareader.data as web
import datetime as dt
import yfinance as yf
import numpy as np

##### Portfolio

In [2]:
# Load portfolio data
df_holding = pd.read_excel('portfolio.xls')
tickers = df_holding['Ticker'].tolist()
shares = df_holding.set_index('Ticker')['Shares']

# Fetch sector information using yfinance
sector_mapping = {}
for ticker in tickers:
    try:
        info = yf.Ticker(ticker).info
        sector_mapping[ticker] = info.get('sector', 'Unknown')  # Handle missing sector info
    except Exception as e:
        print(f"Error fetching sector info for {ticker}: {e}")
        sector_mapping[ticker] = 'Unknown'
sector_mapping = pd.Series(sector_mapping)

# Download and process price data for all tickers
dates = pd.date_range(start='2017-10-31', end='2023-12-31', freq='M')
price_data = {}
for ticker in tickers:
    try:
        data = yf.download(ticker, start='2017-10-01', end='2023-12-31', auto_adjust=True, progress=False)
        data.columns = ['Close', 'High', 'Low', 'Open', 'Volume']
        if data.empty:
            print(f"No data found for {ticker}, filling with zeros")
            price_data[ticker] = pd.Series(0, index=dates)
            continue
        monthly_prices = data['Close'].resample('M').last().ffill()
        price_data[ticker] = monthly_prices.reindex(dates, method='ffill').fillna(0)
    except Exception as e:
        print(f"Error processing {ticker}: {e}")
        price_data[ticker] = pd.Series(0, index=dates)

# Create price DataFrame and calculate returns
prices = pd.DataFrame(price_data)
monthly_returns = prices.pct_change()

# Adjust shares based on monthly return conditions
adjusted_shares = shares.copy()
total_value = pd.Series(0, index=monthly_returns.index)  

for date in monthly_returns.index[1:]:
    for ticker in tickers:
        if monthly_returns.at[date, ticker] > 0.20:
            adjusted_shares[ticker] *= 0.8
        elif monthly_returns.at[date, ticker] < -0.20:
            adjusted_shares[ticker] *= 1.1

    # Calculate market values with adjusted shares
    market_values = prices.loc[date] * adjusted_shares
    total_value.at[date] = market_values.sum()
portfolio_return = total_value.pct_change() * 100

# Create portfolio performance DataFrame
portfolio_df = pd.DataFrame({
    'Date': total_value.index,
    'Portfolio_Value': total_value.values,
    'Portfolio_Return': portfolio_return.values
}).set_index('Date')

sector_weights = pd.DataFrame(index=dates)

for date in monthly_returns.index[1:]:
    market_values = prices.loc[date] * adjusted_shares
    market_values_sector = market_values.groupby(sector_mapping).sum()

    # Calculate sector weights
    sector_weights_at_date = market_values_sector / total_value.at[date]

    # Store weights in pivot format
    for sector in sector_weights_at_date.index:
        sector_weights.at[date, f'Port_{sector}_Wgt'] = sector_weights_at_date[sector]

portfolio_df = pd.concat([portfolio_df, sector_weights], axis=1)
portfolio_df.dropna(inplace=True)
portfolio_df = portfolio_df[~portfolio_df.isin([np.inf, -np.inf]).any(axis=1)]

portfolio_df

Unnamed: 0,Portfolio_Value,Portfolio_Return,Port_Communication Services_Wgt,Port_Consumer Cyclical_Wgt,Port_Consumer Defensive_Wgt,Port_Energy_Wgt,Port_Financial Services_Wgt,Port_Healthcare_Wgt,Port_Industrials_Wgt,Port_Real Estate_Wgt,Port_Technology_Wgt,Port_Utilities_Wgt
2017-12-31,205739.844894,1.295974,0.046059,0.000000,0.168544,0.040773,0.205932,0.260547,0.020955,0.000000,0.054014,0.011641
2018-01-31,218870.712452,6.382268,0.044170,0.000000,0.162784,0.039001,0.210809,0.254661,0.018253,0.021461,0.052588,0.011647
2018-02-28,210676.614895,-3.743807,0.042267,0.000000,0.151910,0.036133,0.217477,0.250749,0.016684,0.019813,0.060241,0.011759
2018-03-31,206156.371803,-2.145584,0.042548,0.000000,0.155936,0.037141,0.215163,0.249389,0.016289,0.019139,0.058952,0.013209
2018-04-30,211168.167725,2.431065,0.042285,0.000000,0.146093,0.039023,0.219604,0.257890,0.016598,0.018542,0.056492,0.014146
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-31,361904.998371,-2.481593,0.021751,0.001870,0.163085,0.039302,0.260103,0.290786,0.013627,0.024158,0.163597,0.012911
2023-09-30,349790.242830,-3.347496,0.021380,0.001921,0.161239,0.042737,0.254547,0.304429,0.013627,0.023905,0.153399,0.014194
2023-10-31,346771.517218,-0.863010,0.022641,0.001818,0.160681,0.037884,0.259967,0.313491,0.013507,0.023119,0.143884,0.014119
2023-11-30,371201.293828,7.044920,0.023597,0.002352,0.153473,0.035001,0.274202,0.301267,0.014148,0.023138,0.152884,0.014274


##### Macro Factors

In [3]:
directory = Path('C:/Users/Kathe/Documents/Python/Quantitative Finance/Multi-Factor Risk Model for Equity Portfolio/')
dataframes = []

# Loop through each CSV file in the directory that starts with 'macro'
for file_path in directory.glob('macro*.csv'):
    df = pd.read_csv(file_path)
    file_name = file_path.stem.lower()
    df['observation_date'] = pd.to_datetime(df['observation_date'])
    
    df.set_index('observation_date', inplace=True)
    df = df.resample('M').mean(numeric_only=True).interpolate(method='linear', limit_direction='both').sort_index()

    # Calculate change metrics
    value_column_name = df.columns[-1]
    if 'cpi' in file_name:
        df['Inflation'] = df[value_column_name].pct_change()
    elif 'treasury' in file_name:
        df['Yield_Change'] = df[value_column_name].diff()

    df.drop(columns=['observation_date'], errors='ignore', inplace=True)
    dataframes.append(df)


macro_df = pd.concat(dataframes)
macro_df = macro_df.sort_index().loc['2017-12-01':'2023-12-31']
macro_df = macro_df.groupby(macro_df.index).ffill()
macro_df.dropna(inplace=True)

colname_dict = {col: 'CPI' if 'cpi' in col.lower() else 'GDP_Growth' if 'nbea' in col.lower() else col for col in macro_df.columns}
macro_df.rename(columns=colname_dict, inplace=True)

macro_df

Unnamed: 0_level_0,CPI,Inflation,GDP_Growth,T10Y2Y,Yield_Change
observation_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-12-31,247.805,0.002107,3.2,0.558500,-0.098167
2018-01-31,248.859,0.004253,3.3,0.551905,-0.006595
2018-02-28,249.529,0.002692,3.3,0.683684,0.131779
2018-03-31,249.577,0.000192,3.3,0.566667,-0.117018
2018-04-30,250.227,0.002604,3.3,0.484762,-0.081905
...,...,...,...,...,...
2023-08-31,306.138,0.005000,3.2,-0.733913,0.195087
2023-09-30,307.374,0.004037,3.2,-0.643000,0.090913
2023-10-31,307.653,0.000908,3.2,-0.269524,0.373476
2023-11-30,308.087,0.001411,3.1,-0.379524,-0.110000


##### Style Factors

In [4]:
start = dt.datetime(2017, 12, 1)
end = dt.datetime(2023, 12, 31)
fama_french_df = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start, end)[0]
fama_french_df.index = fama_french_df.index.to_timestamp() + pd.offsets.MonthEnd(0)
fama_french_df

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-12-31,1.06,-1.32,0.06,0.09
2018-01-31,5.57,-3.12,-1.28,0.12
2018-02-28,-3.65,0.26,-1.04,0.11
2018-03-31,-2.35,4.06,-0.20,0.11
2018-04-30,0.29,1.13,0.54,0.14
...,...,...,...,...
2023-08-31,-2.39,-3.20,-1.08,0.45
2023-09-30,-5.24,-2.49,1.45,0.43
2023-10-31,-3.18,-3.88,0.19,0.47
2023-11-30,8.83,-0.03,1.66,0.44


##### Sector Factors

In [5]:
# List of GICS Sector ETFs
gics_etfs = {
    'Communication Services': 'XLC',
    'Consumer Discretionary': 'XLY',  # Corrected ticker here
    'Consumer Staples': 'XLP',
    'Energy': 'XLE',
    'Financials': 'XLF',
    'Healthcare': 'XLV',
    'Industrials': 'XLI',
    'Information Technology': 'XLK',
    'Materials': 'XLB',
    'Real Estate': 'XLRE',
    'Utilities': 'XLU'
}

# Download sector ETF data
start_date = '2017-10-01'
end_date = '2023-12-31'
sector_prices = yf.download(list(gics_etfs.values()), start=start_date, end=end_date, auto_adjust=True, progress=False)['Close']
sector_prices.bfill(inplace=True)

# Calculate monthly returns
sector_df = sector_prices.resample('M').last().pct_change()
sector_df.columns = [f"{sector}_Rtn" for sector in gics_etfs.keys()]
sector_df.dropna(inplace=True)

sector_df

Unnamed: 0_level_0,Communication Services_Rtn,Consumer Discretionary_Rtn,Consumer Staples_Rtn,Energy_Rtn,Financials_Rtn,Healthcare_Rtn,Industrials_Rtn,Information Technology_Rtn,Materials_Rtn,Real Estate_Rtn,Utilities_Rtn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-11-30,0.009492,0.000000,0.017523,0.034586,0.041661,0.014138,0.055754,0.029231,0.026851,0.029096,0.050658
2017-12-31,0.021270,0.000000,0.052694,0.018881,0.021085,0.005458,0.022530,-0.004383,-0.060582,-0.005505,0.024582
2018-01-31,0.039980,0.000000,0.035843,0.065568,0.053654,0.070368,0.016347,-0.019126,-0.031131,0.065554,0.092410
2018-02-28,-0.052740,0.000000,-0.108350,-0.029253,-0.038630,-0.004091,-0.076271,-0.068090,-0.038793,-0.044949,-0.034691
2018-03-31,-0.041463,0.000000,0.017230,-0.041553,-0.026883,-0.037338,-0.009069,0.038290,0.037965,-0.029166,-0.023847
...,...,...,...,...,...,...,...,...,...,...,...
2023-08-31,-0.033014,-0.015409,0.016461,-0.026889,-0.019833,-0.015083,-0.039472,-0.030636,-0.061279,-0.007007,-0.017440
2023-09-30,-0.047791,-0.029476,0.024033,-0.030882,-0.059475,-0.064784,-0.047879,-0.072349,-0.056424,-0.029592,-0.055320
2023-10-31,-0.031700,-0.012963,-0.057528,-0.024420,-0.029789,0.000488,-0.013806,-0.028471,0.012897,-0.032624,-0.055162
2023-11-30,0.083487,0.078028,-0.007160,0.109394,0.088349,0.128956,0.041261,0.124773,0.051432,0.054360,0.109665


##### Final Dataset

In [6]:
final_df = pd.concat([portfolio_df, macro_df, fama_french_df, sector_df], axis=1)
final_df.dropna(inplace=True)
final_df.to_csv("final_data.csv")