In [176]:
import pandas as pd
import numpy as np

Supply:
```
DATE	Date
PERMIT	New Privately-Owned Housing Units Authorized in Permit-Issuing Places: Total Units (Thousands of Units, Seasonally Adjusted Annual Rate)
MSACSR	Monthly Supply of New Houses in the United States (Seasonally Adjusted)
TLRESCONS	Total Construction Spending: Residential in the United States (Millions of Dollars, Seasonally Adjusted Annual Rate)
EVACANTUSQ176N	Housing Inventory Estimate: Vacant Housing Units in the United States (Thousands of Units, Not Seasonally Adjusted)
CSUSHPISA	S&P/Case-Shiller U.S. National Home Price Index (Index Jan 2000=100, Seasonally Adjusted)
```
Demand:
```
DATE	Date
INTDSRUSM193N	Interest Rates, Discount Rate for United States (Billions of Dollars, Seasonally Adjusted Annual Rate)
UMCSENT	University of Michigan: Consumer Sentiment
GDP	Gross Domestic Product (Billions of Dollars, Seasonally Adjusted Annual Rate)
MORTGAGE30US	30-Year Fixed Rate Mortgage Average in the United States (Percent, Not Seasonally Adjusted)
CSUSHPISA	S&P/Case-Shiller U.S. National Home Price Index (Index Jan 2000=100, Seasonally Adjusted)
MSPUS	Median Sales Price of Houses Sold for the United States (Not Seasonally Adjusted)
```

In [177]:
supply = pd.read_csv("supply.csv")
demand = pd.read_csv("demand.csv")
supply['DATE'] = pd.to_datetime(supply['DATE'])
demand['DATE'] = pd.to_datetime(demand['DATE'])
demand['CSUSHPISA'] = pd.to_numeric(demand['CSUSHPISA'], errors='coerce')
supply['CSUSHPISA'] = pd.to_numeric(supply['CSUSHPISA'], errors='coerce')

### Assume HPI is taken as the y variable, or dependent variable, as an indicator of change in prices.

In [178]:
data = pd.merge(demand, supply, on=['DATE', 'CSUSHPISA'], how='inner')

In [179]:
data = data.set_index('DATE')

In [180]:
data['INTDSRUSM193N'] = data['INTDSRUSM193N'].fillna(data['INTDSRUSM193N'].mean())

In [181]:
# Assuming 'data' is your DataFrame
columns_to_change = ['CSUSHPISA', 'UMCSENT', 'MSPUS', 'GDP', 'PERMIT', 'TLRESCONS', 'EVACANTUSQ176N']

# Convert selected columns to numeric
data = data.apply(pd.to_numeric, errors='coerce')

# Calculate the percentage change for selected columns
# data[columns_to_change] = data[columns_to_change].pct_change()

data[columns_to_change] = data[columns_to_change] / data[columns_to_change].shift(1)

data[['MORTGAGE30US', 'INTDSRUSM193N', 'MSACSR']] = (data[['MORTGAGE30US', 'INTDSRUSM193N', 'MSACSR']] / 100)

In [182]:
data = data.dropna()

## Getting factor exposure

In [183]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
import pandas as pd
from sklearn.metrics import r2_score

### Linear Reg

In [184]:
# Split data into features and target variable
X = data.loc[:, data.columns != 'CSUSHPISA']
y = data['CSUSHPISA']

# Create and train the linear regression model
model = LinearRegression()
model.fit(X, y)

# Get the coefficients (beta) of the linear regression model
beta_coefficients = model.coef_

In [185]:
# Calculate R-squared using r2_score function
y_pred = model.predict(X)
r_squared = r2_score(y, y_pred)
r_squared

0.8026584026452003

In [186]:
factor_exposure = pd.DataFrame([beta_coefficients], columns=X.columns)

In [187]:
factor_exposure

Unnamed: 0,MORTGAGE30US,UMCSENT,INTDSRUSM193N,MSPUS,GDP,MSACSR,PERMIT,TLRESCONS,EVACANTUSQ176N
0,-0.281999,-0.038185,0.204399,0.144063,0.235189,-0.43347,-0.078955,0.207572,-0.010954


## factor covariance

In [188]:
factor_cov = X.cov()

In [189]:
# Assume GDP drop by 10%
target_shock = pd.DataFrame([-0.1], columns=['GDP'])

### Gaussian propagation

In [190]:
factor_exposure.dot(factor_cov).dot(factor_exposure.T)

Unnamed: 0,0
0,0.000323
