# **Section 3: Form a portfolio, data selection, and necessary transformation**

In [13]:
import pandas as pd
import yfinance as yf
import numpy as np
from statsmodels.tsa.stattools import adfuller

## Part 1 - Creating portfolio

In [None]:
tickers = [
    'AAPL', 'MSFT',                                   # Information Technology
    'BIIB', 'JNJ', 'LLY', 'MRK', 'PFE',               # Health Care
    'AMZN', 'NKE',                                    # Consumer Discretionary
    'JPM', 'BAC', 'C', 'MS',                          # Financials
    'GOOGL',                                          # Communication Services
    'HON', 'UNP',                                     # Industrials
    'PG', 'KO', 'WMT', 'CL', 'TSN',                   # Consumer Staples
    'XOM', 'CVX',                                     # Energy
    'NEE',                                            # Utilities
    'PLD', 'AMT',                                     # Real Estate
    'LIN'                                             # Materials
]

equal_weight = 1/len(tickers)
weights = {ticker:equal_weight for ticker in tickers}
prices = yf.download(tickers, start='1976-04-01', end='2023-10-01', interval='1mo', progress=False)['Adj Close']

## Part 2 - Clean data

In [19]:
prices.index = pd.to_datetime(prices.index)
prices.index = prices.index.strftime('%Y-%m')
prices.dropna(inplace=True)
prices.head()

Ticker,AAPL,AMT,AMZN,BAC,BIIB,C,CL,CVX,GOOGL,HON,...,MSFT,NEE,NKE,PFE,PG,PLD,TSN,UNP,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-09,0.583704,11.720683,2.043,27.29764,61.169998,278.598969,14.123346,25.307053,3.235232,21.670811,...,17.250336,4.690568,7.723268,12.824572,30.943274,18.852251,11.653689,9.766552,11.790462,24.749004
2004-10,0.789319,13.12564,1.7065,28.502588,58.16,280.177765,13.948285,25.033417,4.758987,20.353401,...,17.449978,4.73039,7.979589,12.133051,29.262321,19.096685,10.54797,10.549778,11.950032,25.204752
2004-11,1.009997,13.843381,1.984,29.444395,58.68,285.172729,14.453037,25.759977,4.542805,21.350525,...,16.726269,4.828564,8.308352,11.638512,30.722244,20.58061,11.922842,10.628514,11.53781,26.244287
2004-12,0.970079,14.049546,2.2145,29.902573,66.610001,307.030701,16.077785,24.962433,4.812658,21.512022,...,18.579523,5.180111,8.900121,11.337541,31.641409,20.807276,13.417189,11.266834,11.706243,26.391441
2005-01,1.158371,13.835753,2.161,29.797823,64.959999,312.574799,16.511471,25.86091,4.883303,21.858309,...,18.273577,5.311087,8.513954,10.186501,30.578627,19.383291,12.520276,10.032343,11.641443,26.566484


## Part 4 - Stationarity tests on MEVs

In [20]:
MEVs = pd.read_csv('2024-Table_2A_Historic_Domestic.csv')
quarter_to_month = {'Q1': '03', 'Q2': '06', 'Q3': '09', 'Q4': '12'}

def convert_to_yyyy_mm(quarter_str):
    year, quarter = quarter_str.split()
    month = quarter_to_month[quarter]
    return f"{year}-{month}"
    
MEVs['Date'] = MEVs['Date'].apply(convert_to_yyyy_mm)
MEVs.set_index('Date', inplace=True)
MEVs.drop(columns=['Scenario Name'], inplace=True)

MEVs.head()

Unnamed: 0_level_0,Real GDP growth,Nominal GDP growth,Real disposable income growth,Nominal disposable income growth,Unemployment rate,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,10-year Treasury yield,BBB corporate yield,Mortgage rate,Prime rate,Dow Jones Total Stock Market Index (Level),House Price Index (Level),Commercial Real Estate Price Index (Level),Market Volatility Index (Level)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1976-03,9.3,14.0,5.0,9.6,7.7,4.7,4.9,7.4,7.6,,8.9,6.8,,22.9,50.9,
1976-06,3.0,7.2,2.3,5.8,7.6,3.6,5.2,7.4,7.6,,8.8,6.9,,23.6,51.8,
1976-09,2.2,7.6,3.2,9.6,7.7,6.5,5.2,7.3,7.6,,9.0,7.1,,24.2,52.6,
1976-12,2.9,10.5,2.6,9.2,7.8,5.9,4.7,6.5,7.1,,8.8,6.5,,25.2,53.4,
1977-03,4.8,11.7,0.9,8.4,7.5,7.5,4.6,6.8,7.2,,8.7,6.3,,26.2,55.0,


In [21]:
def make_stationary(series, significance_level=0.05):
    """
    Differentiates a pandas Series until it becomes stationary based on the Augmented Dickey-Fuller test.
    
    Parameters:
    - series: pd.Series - The time series data to test for stationarity.
    - significance_level: float - The p-value threshold to consider the series stationary (default is 0.05).
    
    Returns:
    - num_diffs: int - The number of differences needed to achieve stationarity.
    """
    num_diffs = 0
    diff_series = series.copy()
    
    while True:

        adf_test = adfuller(diff_series.dropna())
        p_value = adf_test[1]
        
        if p_value < significance_level:
            return num_diffs, diff_series
        
        diff_series = diff_series.diff().dropna()
        num_diffs += 1

In [22]:
diffs_needed = pd.DataFrame(index=MEVs.columns, columns=['Differences'])
for MEV in MEVs.columns:
    stationary = make_stationary(MEVs[MEV])
    diffs_needed.loc[MEV] = stationary[0]
    if stationary[0] != 0:
        MEVs[MEV] = stationary[1]
MEVs = MEVs.dropna()
MEVs.head()

Unnamed: 0_level_0,Real GDP growth,Nominal GDP growth,Real disposable income growth,Nominal disposable income growth,Unemployment rate,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,10-year Treasury yield,BBB corporate yield,Mortgage rate,Prime rate,Dow Jones Total Stock Market Index (Level),House Price Index (Level),Commercial Real Estate Price Index (Level),Market Volatility Index (Level)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1990-03,4.4,9.0,3.3,2.6,5.3,3.0,0.1,0.5,0.5,0.6,0.3,-0.5,-146.4,1.1,-1.0,27.3
1990-06,1.5,6.1,3.0,-2.6,5.3,-3.1,-0.1,0.2,0.3,0.3,0.2,0.0,150.9,0.0,-0.9,24.2
1990-09,0.3,3.7,0.1,-1.6,5.7,3.1,-0.2,-0.2,0.0,-0.1,-0.2,0.0,-545.1,-0.1,-0.5,36.5
1990-12,-3.6,-0.7,-3.2,-3.1,6.1,-0.1,-0.5,-0.4,-0.3,0.3,-0.1,0.0,222.1,-0.4,-0.4,34.0
1991-03,-1.9,2.0,1.2,1.3,6.6,-4.0,-1.0,-0.4,-0.3,-0.5,-0.5,-0.8,482.3,-0.6,-1.0,36.2


In [24]:
diffs_needed[diffs_needed['Differences']!=0]

Unnamed: 0,Differences
Nominal disposable income growth,1
CPI inflation rate,1
3-month Treasury rate,1
5-year Treasury yield,1
10-year Treasury yield,1
BBB corporate yield,1
Mortgage rate,1
Prime rate,1
Dow Jones Total Stock Market Index (Level),1
House Price Index (Level),1


## Part 5 - Summary Statistics

### For the stock returns:

In [25]:
prices.describe().T

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AAPL,229.0,41.089456,51.693559,0.583704,5.58398,19.840925,45.121754,194.971786
AMT,229.0,93.712153,73.646326,11.720683,32.260513,65.349594,142.240662,267.634766
AMZN,229.0,45.62984,53.076733,1.3445,4.528,16.122,85.936501,175.3535
BAC,229.0,21.329719,10.299661,3.136745,12.113328,21.554998,29.444395,44.192059
BIIB,229.0,188.913712,115.398784,34.450001,57.389999,219.289993,289.730011,422.23999
C,229.0,94.3899,107.034432,11.438828,35.938484,43.804794,61.727062,384.07196
CL,229.0,45.405716,19.576114,13.948285,26.109667,50.561008,61.057652,79.470924
CVX,229.0,70.391146,32.970924,24.89118,43.791653,69.687386,85.466705,169.508759
GOOGL,229.0,40.853445,37.465174,3.235232,12.862294,26.935305,56.320042,147.680313
HON,229.0,85.128545,60.215034,17.792784,32.817371,70.846855,132.052353,217.606293


### The stock prices range from October of 2004 up to October of 2023 on a monthly basis.

### For the MEVs:

In [26]:
MEVs.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Real GDP growth,136.0,2.541912,4.532672,-28.0,1.475,2.55,4.025,34.8
Nominal GDP growth,136.0,4.873529,5.098456,-29.2,3.6,5.0,6.5,39.7
Real disposable income growth,136.0,2.811029,7.538795,-27.6,1.2,2.75,4.225,56.0
Nominal disposable income growth,136.0,-0.017647,12.432013,-86.6,-1.925,0.2,2.0,69.3
Unemployment rate,136.0,5.758088,1.742946,3.5,4.475,5.4,6.7,13.0
CPI inflation rate,136.0,-0.009559,2.423462,-15.2,-1.2,0.0,1.025,8.4
3-month Treasury rate,136.0,-0.017647,0.426334,-1.3,-0.1,0.0,0.2,1.6
5-year Treasury yield,136.0,-0.025735,0.415977,-1.1,-0.3,0.0,0.2,1.2
10-year Treasury yield,136.0,-0.025735,0.368178,-0.9,-0.3,0.0,0.2,1.0
BBB corporate yield,136.0,-0.026471,0.484613,-1.6,-0.3,0.0,0.225,2.6


### The MEVs range from Q1 of 1990 to Q3 of 2023. Chances are in the data analysis where the MEVs are used, the years that are in this dataset preceding those in the stock returns dataset will be dropped.

## **Section 4: Stress Testing using Fama-French three-factor model**

## Part 1 - Picking a subset of MEVs

### We're going to try and capture the big economic picture at each time step while avoiding redundancy. In order to do this, we chose to use the CPI, real GDP growth, real disposable income growth, 3 month, 5 year, and 10 year rates.

In [37]:
MEVs = MEVs[['Real GDP growth', 
             'Real disposable income growth', 
             'CPI inflation rate', 
             '3-month Treasury rate',
             '5-year Treasury yield',
             '10-year Treasury yield',
             'Market Volatility Index (Level)'
            ]]

## Part 2 - Report the results from the Fama-French three factor model

In [29]:
#portfolio returns

returns = prices.pct_change()[1:]
returns['portfolio'] = sum(returns[stock] * weights[stock] for stock, weight in weights.items()) # Setting the portfolio returns based on weights

In [35]:
FF_factors = pd.read_excel('wrds_data.xlsx', sheet_name='returns', index_col=0)
FF_factors = FF_factors[['FAMA-FRENCH MARKET FACTOR', 'FAMA-FRENCH SIZE FACTOR (SMB)', 'FAMA-FRENCH VALUE FACTOR (HML)', 'MOMENTUM FACTOR']]
FF_factors = FF_factors.rename(columns={'FAMA-FRENCH MARKET FACTOR': 'MKT', 'FAMA-FRENCH SIZE FACTOR (SMB)': 'SMB', 'FAMA-FRENCH VALUE FACTOR (HML)': 'HML', 'MOMENTUM FACTOR': 'UMD'})
FF_factors.head()


  warn("""Cannot parse header or footer so it will be ignored""")


Unnamed: 0_level_0,MKT,SMB,HML,UMD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1963-01,0.0493,0.0308,0.0221,-0.0211
1963-02,-0.0238,0.0048,0.0218,0.0253
1963-03,0.0308,-0.0259,0.0206,0.0162
1963-04,0.0451,-0.0134,0.01,-0.0009
1963-05,0.0176,0.0113,0.0254,0.0033


In [36]:
FF_model = FF_factors.copy()
FF_model['portfolio'] = returns['portfolio']
FF_model = FF_model.dropna()
FF_model

Unnamed: 0_level_0,MKT,SMB,HML,UMD,portfolio
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-10,0.0143,0.0015,-0.0022,-0.0138,0.022147
2004-11,0.0454,0.0374,0.0141,0.0316,0.029243
2004-12,0.0343,-0.0003,-0.0022,-0.0287,0.051720
2005-01,-0.0276,-0.0172,0.0206,0.0305,-0.013803
2005-02,0.0189,-0.0057,0.0154,0.0337,0.014125
...,...,...,...,...,...
2023-05,0.0035,0.0160,-0.0774,-0.0065,-0.034575
2023-06,0.0647,0.0155,-0.0020,-0.0235,0.043780
2023-07,0.0321,0.0205,0.0411,-0.0405,0.022336
2023-08,-0.0239,-0.0320,-0.0108,0.0377,-0.021259


In [74]:
FF_portfolio = get_ols_metrics(FF_model[['MKT', 'SMB', 'HML', 'UMD']], FF_model['portfolio'])
FF_portfolio

Unnamed: 0,alpha,MKT,SMB,HML,UMD,r-squared,Info Ratio
portfolio,0.005963,0.912181,-0.271542,0.096881,-0.017913,0.901248,0.448041


## The Fama-French model for our portfolio is given by: 
$$
\Huge
E[r_i] = 0.92(MKT - r_f) - 0.3(SMB) + 0.1(HML) - 0.02(UMD) + 0.01 
$$

## Part 3 - Identify the impact of our chosen MEVs on Fama-French factors.

### Note: Here we merged the dataframes of the FF factors with the MEVs dataframe, inherently dropping all months that don't fall at the end of a quarter. This was to maintain uniformity and ensure an accurate result. We felt this method was better than interpolating MEV data because those numbers come straight from the Fed and it wouldn't be reliable to try and subjectively interpret gaps in data.

In [75]:
FF_model_MEVs = FF_factors.merge(MEVs, left_index=True, right_index=True, how='inner')

In [76]:
alltime_MEVs_effects_on_factors = pd.DataFrame(index=FF_model.columns[:-1], columns=MEVs.columns)
regressors = FF_model_MEVs[MEVs.columns]

In [77]:
for factor in FF_model.columns[:-1]:
    reg = get_ols_metrics(regressors, FF_model_MEVs[factor])
    for MEV in reg.columns[1:-2]:
        alltime_MEVs_effects_on_factors.loc[factor, MEV] = reg[MEV].values[0]

In [78]:
alltime_MEVs_effects_on_factors

Unnamed: 0,Real GDP growth,Real disposable income growth,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,10-year Treasury yield
MKT,0.001038,0.000347,-0.000258,0.000558,-0.011286,-0.000289
SMB,0.000407,-0.000307,-0.000689,0.000178,-0.031251,0.031509
HML,-0.000108,0.000581,0.000561,-0.00137,0.011316,-0.011033
UMD,0.001034,-0.000257,-0.000657,-0.006656,-0.000665,-0.0067


### This dataframe shows the coefficients of the linear regressions:
$$
\Huge
FF_i = \alpha_i + \beta_{1_i}MEV_1 + \beta_2MEV_2 + \ldots + \beta_nMEV_n
$$

## Part 4 - Investigating the impact of the MEVs on Fama-French factors during *stressed times*

### We are picking all date ranges of stressed times after 1976, where our data for FF factors and MEVs starts. This gives a full picture on the effect of the MEVs on the FF factors throughout history of the past ~5 decades, whereas a smaller sample size wouldn't accurately describe the effects.

In [79]:
df = FF_factors.merge(MEVs, left_index=True, right_index=True, how='inner')
stressed_date_ranges = [('1980-01', '1980-06'),
                        ('1981-06', '1982-12'),
                        ('1990-06', '1991-03'),
                        ('2001-03', '2001-12'),
                        ('2007-12', '2009-06'),
                        ('2020-03', '2020-06')
                       ]
stressed_data = pd.concat(
    [df.loc[start:end] for start, end in stressed_date_ranges]
)

In [80]:
stressed_MEVs_effects_on_factors = pd.DataFrame(index=FF_model.columns[:-1], columns=MEVs.columns)
regressors = stressed_data[MEVs.columns]
for factor in FF_model.columns[:-1]:
    reg = get_ols_metrics(regressors, stressed_data[factor])
    for MEV in reg.columns[1:-2]:
        stressed_MEVs_effects_on_factors.loc[factor, MEV] = reg[MEV].values[0]
stressed_MEVs_effects_on_factors

Unnamed: 0,Real GDP growth,Real disposable income growth,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,10-year Treasury yield
MKT,-0.003533,-0.001477,0.000422,-0.005826,0.021461,-0.043141
SMB,-0.000399,-0.000226,-0.001747,-0.016149,0.017437,-0.008091
HML,6.3e-05,-0.000588,0.000285,0.00031,0.007701,-0.002738
UMD,0.006836,0.003736,0.001189,-0.007574,-0.022251,0.003064


### This table similarly shows the coefficients of the MEVs on FF factors.

## Part 5 - Projecting the performance of our portfolio

In [88]:
# Read in the adverse situation MEVs
MEVs_severe = pd.read_csv('MEV_severe.csv')
MEVs_severe['Date'] = MEVs_severe['Date'].apply(convert_to_yyyy_mm)
MEVs_severe.set_index('Date', inplace=True)
MEVs_severe = MEVs_severe[MEVs.columns] # Filter to only the MEVs we're using

In [89]:
# Handle stationarity similar to before
diffs_needed = pd.DataFrame(index=MEVs_severe.columns, columns=['Differences'])
for MEV in MEVs_severe.columns:
    stationary = make_stationary(MEVs_severe[MEV])
    diffs_needed.loc[MEV] = stationary[0]
    if stationary[0] != 0:
        MEVs_severe[MEV] = stationary[1]
MEVs_severe = MEVs_severe.dropna()

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


Unnamed: 0_level_0,Real GDP growth,Real disposable income growth,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,10-year Treasury yield
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1976-06,3.0,2.3,-1.1,0.3,0.0,0.0
1976-09,2.2,3.2,2.9,0.0,-0.1,0.0
1976-12,2.9,2.6,-0.6,-0.5,-0.8,-0.5
1977-03,4.8,0.9,1.6,-0.1,0.3,0.1
1977-06,8.0,3.8,-0.3,0.2,0.0,0.1


In [91]:
MEVs_severe.head()

Unnamed: 0_level_0,Real GDP growth,Real disposable income growth,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,10-year Treasury yield
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-03,2.0,2.8,0.1,0.1,0.0,0.4
2025-06,-1.7,1.3,0.0,0.1,0.1,-0.2
2025-09,-2.1,0.5,0.0,0.1,0.1,0.1
2025-12,5.3,3.6,0.1,0.1,0.0,-4.440892e-16
2026-03,-6.0,0.3,0.0,0.1,0.1,7.771561e-16


In [90]:
alltime_MEVs_effects_on_factors

Unnamed: 0,Real GDP growth,Real disposable income growth,CPI inflation rate,3-month Treasury rate,5-year Treasury yield,10-year Treasury yield
MKT,0.001038,0.000347,-0.000258,0.000558,-0.011286,-0.000289
SMB,0.000407,-0.000307,-0.000689,0.000178,-0.031251,0.031509
HML,-0.000108,0.000581,0.000561,-0.00137,0.011316,-0.011033
UMD,0.001034,-0.000257,-0.000657,-0.006656,-0.000665,-0.0067


In [None]:
# TODO: run the regression using all time MEV coefficients against severe MEV values to get MKT, SMB, HML, UMD values 
# and then plug those values into the regression equation for earlier at each time step to get portfolio return