## Imports

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
from fredapi import Fred
from dotenv import load_dotenv
import os
import json

load_dotenv()

FRED_API_KEY = os.getenv("FRED_API_KEY")

fred = Fred(api_key=FRED_API_KEY)

## Helpers

In [2]:
def series_to_json(series, df):
    """
    Convert a FRED series to a JSON string.
    """
    path = f"datasets/fred_{series}.json"
    # If file exists, load and merge
    if os.path.exists(path):
        # Load existing data
        with open(path, "r") as f:
            existing_data = json.load(f)
        df_existing = pd.DataFrame(existing_data)
        df_existing["Date"] = pd.to_datetime(df_existing["Date"])

        # Append only new rows
        df_combined = pd.concat([df_existing, df])
        df_combined = df_combined.drop_duplicates(subset=["Date"]).sort_values("Date")
    else:
        # No file exists, use fresh data
        df_combined = df

    # Save to JSON
    df_combined.to_json(path, orient="records", date_format="iso")

    print(f"Updated data saved to {path}")


In [3]:
def scale_for_inflation(cpi_df: pd.DataFrame, from_year: int, to_year: int, amount: float):
    from_year_cpi = cpi_df.loc[cpi_df['Year'] == from_year, 'CPI'].values[0]
    to_year_cpi = cpi_df.loc[cpi_df['Year'] == to_year, 'CPI'].values[0]
    adjusted_value = (amount * (to_year_cpi / from_year_cpi))
    
    return round(adjusted_value, 2)


## Affordability

### US Population

In [None]:
pop = fred.get_series('POPTHM')
pop_df = pop.to_frame().reset_index()
pop_df.columns = ['Date', 'US Population']
pop_df['Date'] = pd.to_datetime(pop_df['Date'])
pop_df.set_index('Date', inplace=True)

pop_df = pop_df.resample('YE').max().round(0)
pop_df.index = pop_df.index.year
pop_df.reset_index(inplace=True)
pop_df.columns = ['Year', 'US Population']
pop_df['US Population'] = pop_df['US Population'] * 1000

pop_df.tail()

### Median Home Prices

In [None]:
# Median Home Price
# Fred series MSPUS
median_home_prices = fred.get_series('MSPUS')
df_home_median_prices = median_home_prices.to_frame().reset_index()
df_home_median_prices.columns = ['Date', 'Median Sales Price']
df_home_median_prices['Date'] = pd.to_datetime(df_home_median_prices['Date'])
df_home_median_prices.set_index('Date', inplace=True)

df_home_median_prices_annual = df_home_median_prices.resample('YE').mean()
df_home_median_prices_annual.index = df_home_median_prices_annual.index.year
df_home_median_prices_annual.reset_index(inplace=True)
df_home_median_prices_annual.columns = ['Year', 'Median Sales Price']
df_home_median_prices_annual.tail()


In [None]:
from datasets import get_median_home_prices

df = get_median_home_prices()
df.tail()

### Median Personal Income

In [None]:
# Median Personal Income In the United States (MEPAINUSA646N)
median_pers_income = fred.get_series('MEPAINUSA646N')
df_median_pers_income = median_pers_income.to_frame().reset_index()
df_median_pers_income.columns = ['Date', 'Median Personal Income']
df_median_pers_income['Date'] = pd.to_datetime(df_median_pers_income['Date'])
df_median_pers_income.set_index('Date', inplace=True)
df_median_pers_income.index = df_median_pers_income.index.year
df_median_pers_income.reset_index(inplace=True)
df_median_pers_income.columns = ['Year', 'Median Personal Income']

df_median_pers_income.tail()

### Median Family Income

In [None]:
# Median Family Income in the United States (MEFAINUSA646N)
median_family_income = fred.get_series('MEFAINUSA646N')
df_median_family_income =  median_family_income.to_frame().reset_index()
df_median_family_income.columns = ['Date', 'Median Family Income']
df_median_family_income['Date'] = pd.to_datetime(df_median_family_income['Date'])
df_median_family_income.set_index('Date', inplace=True)
df_median_family_income.index = df_median_family_income.index.year
df_median_family_income.reset_index(inplace=True)
df_median_family_income.columns = ['Year', 'Median Family Income']

df_median_family_income.tail()

### Mortgage Rate - 30 year

In [None]:
# 30 Year Mortgage Rate

mtg30 = fred.get_series('MORTGAGE30US')
df_mtg30 = mtg30.to_frame().reset_index()
df_mtg30.columns = ['Date', '30yr Mtg Rate']
df_mtg30['Date'] = pd.to_datetime(df_mtg30['Date'])
df_mtg30.set_index('Date', inplace=True)
df_mtg30 = df_mtg30.resample('YE').mean()
df_mtg30.index = df_mtg30.index.year
df_mtg30.reset_index(inplace=True)
df_mtg30.columns = ['Year', '30yr Mtg Rate']

df_mtg30.tail()

In [None]:
# Mortgage Payment Burden

# Calc monthly P&I using MORTGAGE30US and 80% of median home price
# Calc monthly T&I using average homeowner's insurance and property taxes
# Annualize the monthly payment and divide by median income

### CPI (CPIAUCSL)

In [None]:
cpi = fred.get_series('CPIAUCSL')
cpi_df = cpi.to_frame().reset_index()
cpi_df.columns = ['Date', 'CPI']
cpi_df['Date'] = pd.to_datetime(cpi_df['Date'])
#cpi_df.set_index('Date', inplace=True)
# cpi_df = cpi_df.resample('YE').max()
#cpi_df.index = cpi_df.index.year
# cpi_df.reset_index(inplace=True)
# cpi_df.columns = ['Year', 'CPI']

cpi_df.tail()

Unnamed: 0,Date,CPI
938,2025-03-01,319.615
939,2025-04-01,320.321
940,2025-05-01,320.58
941,2025-06-01,321.5
942,2025-07-01,322.132


### Personal Consumption Expenditures (PCE)

In [None]:
pce = fred.get_series('PCE')
pce_df = pce.to_frame().reset_index()
pce_df.columns = ['Date', 'PCE']
pce_df['Date'] = pd.to_datetime(pce_df['Date'])
pce_df.set_index('Date', inplace=True)
pce_df = round(pce_df.resample('YE').mean(), 2)
pce_df.index = pce_df.index.year
pce_df.reset_index(inplace=True)
pce_df.columns = ['Year', 'PCE']
pce_df['PCE'] = pce_df['PCE'] * 1000000000


pce_df.tail()

### Expenditures: Household Operations: All Consumer Units (CXUHHOPERLB0101M)

In [None]:
hh_spend = fred.get_series('CXUHHOPERLB0101M')
hh_spend_df = hh_spend.to_frame().reset_index()
hh_spend_df.columns = ['Date', 'HH Spend Annual']
hh_spend_df['Date'] = pd.to_datetime(hh_spend_df['Date'])
hh_spend_df.set_index('Date', inplace=True)
hh_spend_df.index = hh_spend_df.index.year
hh_spend_df.reset_index(inplace=True)
hh_spend_df.columns = ['Year', 'HH Spend Annual']

hh_spend_df.tail()

### Expenditures: Vehicle Insurance: All Consumer Units (CXU500110LB0101M)

In [None]:
vehicle_ins = fred.get_series('CXU500110LB0101M')
vehicle_ins_df = vehicle_ins.to_frame().reset_index()
vehicle_ins_df.columns = ['Date', 'Vehilcle Ins Annual']
vehicle_ins_df['Date'] = pd.to_datetime(vehicle_ins_df['Date'])
vehicle_ins_df.set_index('Date', inplace=True)
vehicle_ins_df.index = vehicle_ins_df.index.year
vehicle_ins_df.reset_index(inplace=True)
vehicle_ins_df.columns = ['Year', 'Vehicle Ins Annual']

vehicle_ins_df.tail()

### Total Households (TTLH)

In [None]:
hh = fred.get_series("TTLHH")
hh_df = hh.to_frame().reset_index()
hh_df.columns = ['Date', 'US Households']
hh_df['Date'] = pd.to_datetime(hh_df['Date'])
hh_df.set_index('Date', inplace=True)
hh_df.index = hh_df.index.year
hh_df.reset_index(inplace=True)
hh_df.columns = ['Year', 'US Households']
hh_df['US Households'] = hh_df['US Households'] * 1000

hh_df.tail()

### PCE Services: Healthcare (DHLCRC1Q027SBEA)

In [None]:
pce_healthcare = fred.get_series("DHLCRC1Q027SBEA")
pce_healthcare_df = pce_healthcare.to_frame().reset_index()
pce_healthcare_df.columns = ['Date', 'PCE Healthcare']
pce_healthcare_df['Date'] = pd.to_datetime(pce_healthcare_df['Date'])
pce_healthcare_df.set_index('Date', inplace=True)
pce_healthcare_df = round(pce_healthcare_df.resample('YE').mean(), 2)
pce_healthcare_df.index = pce_healthcare_df.index.year
pce_healthcare_df.reset_index(inplace=True)
pce_healthcare_df.columns = ['Year', 'PCE Healthcare']
pce_healthcare_df['PCE Healthcare'] = pce_healthcare_df['PCE Healthcare'] * 1000000000

pce_healthcare_df.tail()

### CPI Used Cars and Trucks (CUSR0000SETA02)

In [None]:
auto_cpi = fred.get_series('CUSR0000SETA02')
auto_cpi_df = auto_cpi.to_frame().reset_index()
auto_cpi_df.columns = ['Date', 'Used Auto CPI']
auto_cpi_df['Date'] = pd.to_datetime(auto_cpi_df['Date'])
auto_cpi_df.set_index('Date', inplace=True)
auto_cpi_df = auto_cpi_df.resample('YE').mean()
auto_cpi_df.index = auto_cpi_df.index.year
auto_cpi_df.reset_index(inplace=True)
auto_cpi_df.columns = ['Year', 'Used Auto CPI']


In [None]:
used_car_ref_year = 2024
used_car_ref_price = 28472

ref_cpi = auto_cpi_df.loc[auto_cpi_df['Year'] == used_car_ref_year, 'Used Auto CPI'].values[0]
auto_cpi_df['Est Avg Used Car Price'] = round((auto_cpi_df['Used Auto CPI'] * (used_car_ref_price / ref_cpi)),2)
auto_cpi_df.tail()


### CPI New Cars and Trucks (CUUR0000SETA01)

In [None]:
new_auto_cpi = fred.get_series('CUSR0000SETA01')
new_auto_cpi_df = new_auto_cpi.to_frame().reset_index()
new_auto_cpi_df.columns = ['Date', 'New Auto CPI']
new_auto_cpi_df['Date'] = pd.to_datetime(new_auto_cpi_df['Date'])
new_auto_cpi_df.set_index('Date', inplace=True)
new_auto_cpi_df = new_auto_cpi_df.resample('YE').mean()
new_auto_cpi_df.index = new_auto_cpi_df.index.year
new_auto_cpi_df.reset_index(inplace=True)
new_auto_cpi_df.columns = ['Year', 'New Auto CPI']

In [None]:
new_car_ref_year = 2024
new_car_ref_price = 48397

new_car_ref_cpi = new_auto_cpi_df.loc[new_auto_cpi_df['Year'] == new_car_ref_year, 'New Auto CPI'].values[0]
new_auto_cpi_df['Est Avg New Car Price'] = round((new_auto_cpi_df['New Auto CPI'] * (new_car_ref_price / ref_cpi)),2)
new_auto_cpi_df.tail()

In [None]:
from datasets import get_cpi_prices_new_cars

df = get_cpi_prices_new_cars()
df.tail()

### Real Disposable Personal Income (DSPI)

In [None]:
rdpi = fred.get_series('DSPI')
rdpi_df = rdpi.to_frame().reset_index()
rdpi_df.columns = ['Date', 'RDPI']
rdpi_df['Date'] = pd.to_datetime(rdpi_df['Date'])
rdpi_df.set_index('Date', inplace=True)
rdpi_df = rdpi_df.resample('YE').max()
rdpi_df.index = rdpi_df.index.year
rdpi_df.reset_index(inplace=True)
rdpi_df.columns = ['Year', 'RDPI']
rdpi_df['RDPI'] = rdpi_df['RDPI'] * 1000000000

rdpi_df.tail()

### MERGED Affordability DF - ANNUAL 

In [None]:
import pandas as pd
from functools import reduce

def merge_on_year(dfs, how='inner'):
    """
    Merge a list of dataframes on the 'Year' column.

    Parameters:
    dfs (list): List of pandas DataFrames, each with a 'Year' column.
    how (str): Type of merge - 'inner', 'outer', 'left', 'right'. Default 'inner'.

    Returns:
    DataFrame: Merged dataframe.
    """
    # Safety check: make sure they all have 'Year' column
    for i, df in enumerate(dfs):
        if 'Year' not in df.columns:
            raise ValueError(f"DataFrame at index {i} is missing 'Year' column.")

    merged_df = reduce(lambda left, right: pd.merge(left, right, on='Year', how=how), dfs)
    return merged_df

In [None]:
def calc_mtg_pi_payment(principal, annual_rate, years=30):
    """
    Calculate monthly principal & interest payment for a mortgage.

    principal: Loan amount
    annual_rate: Annual interest rate as a percentage (e.g., 6.5 for 6.5%)
    years: Term in years (default 30)
    """
    monthly_rate = (annual_rate / 100) / 12
    n_payments = years * 12
    
    if monthly_rate == 0:
        return principal / n_payments  # handle zero interest edge case
    
    payment = principal * (monthly_rate * (1 + monthly_rate) ** n_payments) / \
              ((1 + monthly_rate) ** n_payments - 1)
    return paymentS


In [None]:
dfs = [df_home_median_prices_annual, df_median_family_income, rdpi_df, cpi_df, pce_df, hh_spend_df, vehicle_ins_df, df_mtg30, pop_df, hh_df, pce_healthcare_df]
df_merged = merge_on_year(dfs)
df_merged['Household Persons'] = round((df_merged['US Population'] / df_merged['US Households']), 2)
df_merged['RDPI Per Capita'] = round((df_merged['RDPI'] / df_merged['US Population']), 2)
#df_merged['PCE Indv Monthly'] = round(((df_merged['PCE'] / df_merged['US Population']) / 12),2)
#df_merged['PCE HH Monthly'] = round(((df_merged['PCE'] / df_merged['US Households']) / 12),2)
df_merged['PCE HH Healthcare Monthly'] = round((df_merged['PCE Healthcare'] / df_merged['US Households']) / 12,2)
#df_merged['PCE Indv Healthcare Monthly'] = round((df_merged['PCE Healthcare'] / df_merged['US Population']) / 12,2)
#df_merged['Home Price Family Income Multiplier'] = df_merged['Median Sales Price'] / df_merged['Median Family Income']
df_merged['Income Home Price Ratio'] = df_merged['Median Family Income'] / df_merged['Median Sales Price']
df_merged['Loan Amount'] = df_merged['Median Sales Price'] * .8
df_merged['Mtg P&I Annual'] = df_merged.apply(lambda row: calc_mtg_pi_payment(row['Loan Amount'] * 12, row['30yr Mtg Rate']), axis=1).round(2)
df_merged['Mtg to Fam Income Ratio'] = df_merged['Mtg P&I Annual'] / df_merged['Median Family Income']
#df_merged['Mtg to Pers Income Ratio'] = df_merged['Mtg P&I Annual'] / df_merged['Median Personal Income']
df_merged = df_merged.dropna(axis=0)
df_merged.tail(50)

In [None]:
scale_for_inflation(cpi_df=cpi_df, from_year=2025, to_year=1960, amount=100)

In [None]:
# Expense to Income Ratios - goods variety

#vehicles, food, healthcare, etc..

# Disposable Income Share
# 100% - (sum of essential expenses)