# Prepare Data & Summary statistics

In [106]:
import numpy as np
import pandas as pd
import io
import re
# ----------------------------------------------
# Real Disposal Income per Capita  & Incom Growth
# ----------------------------------------------
path = "bea_data_quartelry_scaled(million).csv"

# --- 1) Read the file as raw text and keep only the tabular block starting at the first 'Line' row
with open(path, "r", encoding="utf-8-sig") as f:
    raw_text = f.read()

# Find all rows that start with 'Line' exactly (these are the two header rows)
all_lines = raw_text.splitlines()
line_idx = [i for i, s in enumerate(all_lines) if s.strip().startswith("Line")]
if len(line_idx) < 2:
    raise ValueError("Could not find the two 'Line' header rows (Years and Quarters).")

start = line_idx[0]      # 1st 'Line' row (years)
# Stop at legend if present; else read to end
stop_candidates = [i for i, s in enumerate(all_lines) if s.strip().startswith("Legend/Footnotes")]
stop = stop_candidates[0] if stop_candidates else len(all_lines)

table_text = "\n".join(all_lines[start:stop])

# --- 2) Read the table text as a tab-delimited file (it’s a TSV, not a CSV)
df = pd.read_csv(io.StringIO(table_text), sep=",", engine="python", header=None, dtype=str)

# Now df looks like:
# row 0: ['Line', '1947', '1947', '1947', '1947', '1948', ...]  (YEARS)
# row 1: ['Line', 'Q1',   'Q2',   'Q3',   'Q4',   'Q1',   ...]  (QUARTERS)
# subsequent rows: ['1', 'Personal income', 189.7, 189.7, ...], etc.

# --- 3) Build YYYYQ# from the first two rows
years = df.iloc[0, 2:].tolist()
quarters = df.iloc[1, 2:].tolist()
periods = [f"{int(y)}Q{str(q).strip()[-1]}" for y, q in zip(years, quarters)]
vals = df[df[0]=='39'].iloc[0,2:].astype(float).tolist()

# --- 5) Tidy result + growth + quarter start date; put columns in requested order
result = pd.DataFrame({
    "YYYYQ": periods,
    "RealDPI_perCapita": vals
}).dropna(subset=["RealDPI_perCapita"]).reset_index(drop=True)

result["IncGrow"] = result["RealDPI_perCapita"].pct_change() * 100.0  # percent
result["Date"] = pd.PeriodIndex(result["YYYYQ"], freq="Q").to_timestamp(how="start")
result = result[["Date", "YYYYQ", "RealDPI_perCapita", "IncGrow"]]

print("\n\nReal Disposal Income per Capita  & Incom Growth")
print(result.head())
print(result.tail())


# -------------------
# Unemployment Rate
# -------------------
dfm = pd.read_csv("unemployment_rates_US_monthly.csv")
# print(dfm.head())
# Long format
dfm = dfm.rename(columns={"Unnamed: 0": "Year"})
long = dfm.melt(id_vars="Year", var_name="Month", value_name="Unemp").dropna(subset=["Unemp"])
long["M"] = long["Month"].str.extract(r"M(\d{2})").astype(int)
long["Q"] = ((long["M"] - 1) // 3 + 1).astype(int)

# Quarterly averages
q = long.groupby(["Year", "Q"], as_index=False)["Unemp"].mean()
q["Unemp"] = q["Unemp"].round(2)
q["YYYYQ"] = q["Year"].astype(int).astype(str) + "Q" + q["Q"].astype(int).astype(str)
q["Date"] = pd.PeriodIndex(q["YYYYQ"], freq="Q").to_timestamp(how="start")
q = q[["Date", "YYYYQ", "Unemp"]].sort_values("Date").reset_index(drop=True)
q["dUnemp"] = q["Unemp"].diff()

# Save
out_path = "unemployment_quarterly_YYYYQ.csv"
print("\n\nUnemployment Rate")
print(q.head())
print(q.tail())
# q.to_csv(out_path, index=False)

# ----------------------------------
# Financial Obligation Ratio
# Household Debt Service Ratio
# Assets Value
# ----------------------------------
# The Financial Obligations Ratio is a broader measure than the Debt Service Ratio.

df_for_dsr = pd.read_csv("Household_Debt_Service_Ratio_quarterly.csv")
df_for_dsr['observation_date'] = pd.to_datetime(df_for_dsr['observation_date'], errors="coerce")
df_for_dsr["YYYYQ"] = df_for_dsr["observation_date"].dt.to_period("Q").astype(str)
# df_for_dsr["observation"] = pd.PeriodIndex(df_for_dsr["YYYYQ"], freq="Q").to_timestamp(how="start")
df2 = pd.read_csv("Financial_Obligations_Ratio_quarterly.csv")
df2['observation_date'] = pd.to_datetime(df2['observation_date'], errors="coerce")
df3 = pd.read_csv("Consumer_Debt_Service_Ratio_quarterly.csv")
df3['observation_date'] = pd.to_datetime(df3['observation_date'], errors="coerce")
df4 = pd.read_csv("housing_price_index_quarterly.csv")
df4['observation_date'] = pd.to_datetime(df4['observation_date'], errors="coerce")
df_for_dsr = pd.merge(df_for_dsr, df2, on=["observation_date"], how="outer")
df_for_dsr = pd.merge(df_for_dsr, df3, on=["observation_date"], how="outer")
df_for_dsr = pd.merge(df_for_dsr, df4, on=["observation_date"], how="outer")
df_for_dsr = df_for_dsr.rename(columns={"observation_date": "Date"})
df_for_dsr["YYYYQ"] = df_for_dsr["Date"].dt.to_period("Q").astype(str)
df_for_dsr = df_for_dsr.sort_values("Date").reset_index(drop=True)
df_for_dsr = df_for_dsr.rename(columns={
                                        "TDSP": "TotalDSR",
                                        "CDSP": "ConsumerDSR",
                                        "FODSP": "FOR",
                                        "USSTHPI": "HPI",
                                       })
df_for_dsr["dFOR"] = df_for_dsr["FOR"].diff()
df_for_dsr["dTotalDSR"] = df_for_dsr["TotalDSR"].diff()
df_for_dsr["dConsumerDSR"] = df_for_dsr["ConsumerDSR"].diff()

print("\n\nFOR, DSR, Housing Price Index (HPI)")
print(df_for_dsr.head(25))
print(df_for_dsr.tail(10))


# ----------------------------------
# NCO
# ----------------------------------
df = pd.read_csv("credit_card_nco_panel_cleaned.csv", usecols=["YQ","NCO_RATE_Q", "AVG_CC_LOANS"])
# Bank CC Loan Value-weighted quarterly average
agg = (df
           .groupby("YQ")
           .apply(lambda g: (g["NCO_RATE_Q"] * g["AVG_CC_LOANS"]).sum() / g["AVG_CC_LOANS"].sum(),
              include_groups=False)
           .rename("NCO_RATE_Q").to_frame()
            .rename_axis("YYYYQ")              # rename the index axis
              .reset_index())

agg = agg.sort_values("YYYYQ").reset_index(drop=True)
# Paper uses **change** in charge-off: ΔChargeOff_t
agg["NCO_RATE_Q"] = agg["NCO_RATE_Q"] * 100 # Percent
agg["dNCO"] = agg["NCO_RATE_Q"].diff()
print("\n\n Bank CC Loan Value-weighted quarterly average NCO Rate")
print(agg.head())
print(agg.tail())

# 3) Merge all on Date/ YYYYQ
base = pd.merge(result, q, on=["Date","YYYYQ"], how="outer")
base = pd.merge(base, df_for_dsr, on=["Date","YYYYQ"], how="outer")
base = pd.merge(base, agg, on=["YYYYQ"], how="outer")
base = base.sort_values("Date").reset_index(drop=True)
base["AssetVal"] = base["HPI"].astype(float) / base["RealDPI_perCapita"].astype(float) * 100.0
base["dAssetVal"] = base["AssetVal"].diff()
base["dHPI"] = base["HPI"].diff()

# Ensure column order: Date, YYYYQ, others
cols = base.columns.tolist()
new_order = ["Date", "YYYYQ", "dNCO", "dUnemp", "IncGrow", 
             "dFOR", "dTotalDSR", "dConsumerDSR", "dAssetVal", "dHPI"] + [c for c in cols if c not in ["Date", "YYYYQ", "dNCO", "dUnemp", "IncGrow", 
                                                              "dFOR", "dTotalDSR", "dConsumerDSR", "dAssetVal", "dHPI"]]
base = base[new_order]
print("\n\nFinal Processed Dataset for Baseline Model (Raw)")
print(base.head(25))
print(base.tail(10))

# Save combined file
out_path = "baseline_model_quarterly_raw.csv"
base.to_csv(out_path, index=False)

base = base[["Date", "YYYYQ", "dNCO", "dUnemp", "IncGrow", 
             "dFOR", "dTotalDSR", "dConsumerDSR", "dAssetVal", "dHPI"]]
print("\n\nFinal Processed Dataset for Baseline Model")
print(base.head(25))
print(base.tail(10))
# Save combined file
out_path = "baseline_model_quarterly.csv"
base.to_csv(out_path, index=False)

# -----------------------------
# 1) LOAD & PREP YOUR DATA
# -----------------------------
# Expect a quarterly DataFrame 'df' with columns:
#   NCO      : net charge-off rate (level or change)
#   dUnemp   : Δ unemployment rate
#   IncGrow  : real disposable income per capita growth (level, not differenced)
#   dFOR     : Δ financial obligations ratio (debt payments / disposable income)
#   dHPIY    : Δ house-price-to-income ratio (asset value proxy)

# Example schema (replace with your actual load):
# df = pd.read_csv("your_quarterly_data.csv", parse_dates=['date']).set_index('date')
# df = df.asfreq('Q')  # ensure quarterly frequency




Real Disposal Income per Capita  & Incom Growth
        Date   YYYYQ  RealDPI_perCapita   IncGrow
0 1947-01-01  1947Q1            10311.0       NaN
1 1947-04-01  1947Q2            10165.0 -1.415964
2 1947-07-01  1947Q3            10388.0  2.193802
3 1947-10-01  1947Q4            10195.0 -1.857913
4 1948-01-01  1948Q1            10343.0  1.451692
          Date   YYYYQ  RealDPI_perCapita   IncGrow
309 2024-04-01  2024Q2            51473.0  0.021375
310 2024-07-01  2024Q3            51393.0 -0.155421
311 2024-10-01  2024Q4            51630.0  0.461152
312 2025-01-01  2025Q1            51881.0  0.486151
313 2025-04-01  2025Q2            52192.0  0.599449


Unemployment Rate
        Date   YYYYQ  Unemp  dUnemp
0 2010-01-01  2010Q1   9.83     NaN
1 2010-04-01  2010Q2   9.63   -0.20
2 2010-07-01  2010Q3   9.47   -0.16
3 2010-10-01  2010Q4   9.50    0.03
4 2011-01-01  2011Q1   9.03   -0.47
         Date   YYYYQ  Unemp  dUnemp
57 2024-04-01  2024Q2   4.00    0.17
58 2024-07-01  2024Q3   4.17

In [33]:
import pandas as pd

def full_summary_with_dates(df, exclude_cols=["YYYYQ", "Date"]):
    # numeric columns only, excluding YYYYQ and Date
    num_cols = df.drop(columns=exclude_cols)
    
    # basic stats
    desc = num_cols.describe().T
    desc["median"] = num_cols.median()
    desc["skew"] = num_cols.skew()
    desc["kurtosis"] = num_cols.kurtosis()
    
    # add date ranges based on NaN handling
    first_valid = {}
    last_valid = {}
    for col in num_cols.columns:
        first_valid[col] = df.loc[df[col].first_valid_index(), "YYYYQ"] if df[col].first_valid_index() is not None else None
        last_valid[col] = df.loc[df[col].last_valid_index(), "YYYYQ"] if df[col].last_valid_index() is not None else None
    
    desc["first_valid_quarter"] = pd.Series(first_valid)
    desc["last_valid_quarter"] = pd.Series(last_valid)
    
    return desc
base = pd.read_csv("baseline_model_quarterly.csv").tail(46)
y = base.iloc[1:-1,:3]
y = y.rename(columns={"dNCO": "dNCO_t"})
# X = predictors lagged one quarter
X = base.shift(1).iloc[1:-1, 3:]
X = X.add_suffix("_t-1")   # rename predictors with t-1
base = pd.concat([y, X], axis=1)

summary = full_summary_with_dates(base)
summary.T

Unnamed: 0,dNCO_t,dUnemp_t-1,IncGrow_t-1,dFOR_t-1,dTotalDSR_t-1,dConsumerDSR_t-1,dAssetVal_t-1,dHPI_t-1
count,44.0,44.0,44.0,39.0,44.0,44.0,44.0,44.0
mean,-0.098503,-0.063636,0.570739,-0.030189,-0.015875,-0.001551,0.012098,8.211591
std,2.359869,1.590277,2.82833,0.402536,0.395308,0.212882,0.034652,7.989141
min,-4.6131,-4.2,-7.846992,-1.53371,-1.848007,-0.961907,-0.082047,-5.91
25%,-0.675726,-0.245,0.07802,-0.081032,-0.072467,-0.048849,-0.000028,3.565
50%,-0.009974,-0.115,0.526228,-0.023457,0.022735,0.037173,0.005674,5.655
75%,0.322831,0.0075,0.929419,0.056765,0.137861,0.086562,0.017357,9.0375
max,3.917985,9.17,12.070499,1.163312,0.804187,0.358685,0.126633,37.62
median,-0.009974,-0.115,0.526228,-0.023457,0.022735,0.037173,0.005674,5.655
skew,-0.030377,4.286753,1.699138,-1.639999,-3.0072,-2.821014,0.510664,1.894167


In [68]:
base.to_csv("baseline_model_quarterly_time_shifted.csv",index=False)

In [34]:
base.drop(columns=["YYYYQ", "Date"]).corr()

Unnamed: 0,dNCO_t,dUnemp_t-1,IncGrow_t-1,dFOR_t-1,dTotalDSR_t-1,dConsumerDSR_t-1,dAssetVal_t-1,dHPI_t-1
dNCO_t,1.0,0.042183,0.224271,-0.230288,-0.296138,-0.417125,-0.101751,0.12392
dUnemp_t-1,0.042183,1.0,0.558551,-0.57082,-0.696887,-0.65625,-0.460302,-0.127752
IncGrow_t-1,0.224271,0.558551,1.0,-0.987937,-0.921364,-0.855791,-0.884089,-0.322879
dFOR_t-1,-0.230288,-0.57082,-0.987937,1.0,0.941897,0.904976,0.862861,0.301006
dTotalDSR_t-1,-0.296138,-0.696887,-0.921364,0.941897,1.0,0.973519,0.768874,0.222484
dConsumerDSR_t-1,-0.417125,-0.65625,-0.855791,0.904976,0.973519,1.0,0.689638,0.159068
dAssetVal_t-1,-0.101751,-0.460302,-0.884089,0.862861,0.768874,0.689638,1.0,0.722057
dHPI_t-1,0.12392,-0.127752,-0.322879,0.301006,0.222484,0.159068,0.722057,1.0


# ARCH Baseline Model

In [79]:
# === Requirements ===
# pip install arch pandas numpy statsmodels

import numpy as np
import pandas as pd
from arch import arch_model
from scipy.stats import norm

# -----------------------------
# 1) LOAD & PREP YOUR DATA
# -----------------------------
# Expect a quarterly DataFrame 'df' with columns:
#   NCO      : net charge-off rate (level or change)
#   dUnemp   : Δ unemployment rate
#   IncGrow  : real disposable income per capita growth (level, not differenced)
#   dFOR     : Δ financial obligations ratio (debt payments / disposable income)
#   dHPIY    : Δ house-price-to-income ratio (asset value proxy)

# Example schema (replace with your actual load):
df = pd.read_csv("baseline_model_quarterly_time_shifted.csv", parse_dates=['Date']).set_index('Date')
# print(df.head())
# print(df.tail())
print(df.isna().sum())

# Target is change in NCO (ΔNCO); if you already have ΔNCO, skip this line.
# If you already have ΔNCO, set y = df['dNCO'] and adjust code accordingly.
y = df['dNCO_t']
X = df.iloc[:,2:]
print(y.shape)
print(X.shape)
# print(y.index)
# print(X.index)
# print(y.head())
# print(y.tail())
# print(X.head())
# print(X.tail())
# X = sm.add_constant(X, has_constant='add')  # α + βX (statsmodels is only for the constant)

# -----------------------------
# 2) TRAIN/TEST SPLIT
# -----------------------------
# Choose a cutoff date (or index). Example: last 20 quarters as test.
test_h = 12 # 3 Years
train_end = len(df) - test_h # 44-12 - 8 Years

y_train, y_test = y.iloc[:train_end], y.iloc[train_end:]
X_train, X_test = X.iloc[:train_end], X.iloc[train_end:]

# -----------------------------
# 3.1) FIT ARCH(1) WITH EXOGENOUS MEAN
# -----------------------------
# Mean eq: ΔNCO_t = α + β'X_t-1 + ε_t
# Var  eq: σ_t^2 = ω + γ ε_{t-1}^2  (ARCH(1))
# GARCH
am = arch_model(
    y=y_train,
    x=X_train[["dUnemp_t-1", "IncGrow_t-1", 
               "dConsumerDSR_t-1", #"dFOR_t-1", "dTotalDSR_t-1", "dConsumerDSR_t-1", 
               # "dAssetVal_t-1",# "dHPI_t-1"
              ]],  # exog WITHOUT the constant (arch adds its own const if mean='ARX')
    mean='ARX',  # linear regression with exog
    lags=0,
    vol='ARCH',
    p=1,
    dist='normal'
)

res = am.fit(disp='off')

print(res.summary())


# -----------------------------
# 3.2) FIT GARCH(1,1) WITH EXOGENOUS MEAN (ARX)
# -----------------------------
# Mean:  dNCO_t = α + β'X_{t-1} + ε_t
# Var:   σ_t^2  = ω + γ1*ε_{t-1}^2 + β1*σ_{t-1}^2   (GARCH(1,1))
# Note: arch_model with mean='ARX' includes a constant in the mean by default.

am = arch_model(
    y=y_train,
    x=X_train[["dUnemp_t-1", 
               "IncGrow_t-1", 
               "dConsumerDSR_t-1", #"dFOR_t-1", "dTotalDSR_t-1", "dConsumerDSR_t-1", 
               # "dAssetVal_t-1",# "dHPI_t-1"
              ]],         # exogenous regressors in the mean
    mean='ARX',
    lags=0,
    vol='GARCH',       # <— use GARCH (not just ARCH)
    p=1, q=1,          # GARCH(1,1)
    dist='normal'
)

res = am.fit(disp='off')
print(res.summary())

# -----------------------------
# 4) ONE-STEP-AHEAD FORECASTS (ROLLING)
# -----------------------------
# We forecast 1-step-ahead recursively over the test set, using observed X at t+1.
# arch_model.forecast can do dynamic forecasting; here we refit=False for speed.

# mu_forecasts = []
# sigma2_forecasts = []

# # Keep the last residual to initialize variance recursion for the first forecast
# # (arch handles this internally when using res.forecast).
# fcast = res.forecast(
#     horizon=1,
#     x=X_test.iloc[:, 1:].values,        # future exog WITHOUT constant
#     start=None,                          # start right after sample end
#     reindex=True
# )

# # fcast.mean and fcast.variance return DataFrames indexed by original index
# mu_series = fcast.mean['h.1']
# var_series = fcast.variance['h.1']

# # Because arch’s forecast aligns to the index of the entire series,
# # we subset to our test index:
# mu_hat = mu_series.loc[y_test.index]
# sigma_hat = np.sqrt(var_series.loc[y_test.index])

# # -----------------------------
# # 5) PREDICTION INTERVALS
# # -----------------------------
# alpha = 0.05
# z = norm.ppf(1 - alpha/2)

# lower = mu_hat - z * sigma_hat
# upper = mu_hat + z * sigma_hat

# # -----------------------------
# # 6) EVALUATION
# # -----------------------------
# def mse(a, b): return float(np.mean((a - b) ** 2))

# print("Test MSE (ΔNCO):", mse(y_test, mu_hat))

# # Collect outputs for inspection
# out = pd.DataFrame({
#     'y_true_dNCO': y_test,
#     'y_hat_dNCO': mu_hat,
#     'pi_lower': lower,
#     'pi_upper': upper,
#     'sigma': sigma_hat
# })

# print(out.tail())

# # -----------------------------
# # 7) OPTIONAL: BACK-OUT NCO LEVEL FORECAST (if needed)
# # -----------------------------
# # If you predicted ΔNCO and want NCO level forecasts, accumulate from last observed NCO level:
# # NCO_{t+1|t} = NCO_t + ΔNCO_{t+1|t}
# # Repeat recursively over the test window.

# if 'NCO' in df.columns:
#     nco_level_forecasts = []
#     last_nco = df['NCO'].iloc[train_end-1]  # last observed level before test starts
#     for t in mu_hat.index:
#         f_level = last_nco + mu_hat.loc[t]
#         nco_level_forecasts.append(f_level)
#         last_nco = df.loc[t, 'NCO']  # update with actual if you want pseudo-real-time level
#                                      # or set last_nco = f_level for purely recursive forecasts

#     out['NCO_hat_level_pseudo'] = nco_level_forecasts  # pseudo if you used actuals each step


YYYYQ               0
dNCO_t              0
dUnemp_t-1          0
IncGrow_t-1         0
dFOR_t-1            5
dTotalDSR_t-1       0
dConsumerDSR_t-1    0
dAssetVal_t-1       0
dHPI_t-1            0
dtype: int64
(44,)
(44, 7)
                          AR-X - ARCH Model Results                           
Dep. Variable:                 dNCO_t   R-squared:                       0.337
Mean Model:                      AR-X   Adj. R-squared:                  0.266
Vol Model:                       ARCH   Log-Likelihood:               -66.0433
Distribution:                  Normal   AIC:                           144.087
Method:            Maximum Likelihood   BIC:                           152.881
                                        No. Observations:                   32
Date:                Fri, Aug 22 2025   Df Residuals:                       28
Time:                        15:43:41   Df Model:                            4
                                  Mean Model                    

In [75]:
am = arch_model(
    y=y_train,
    x=X_train[["dUnemp_t-1", "IncGrow_t-1", 
               "dConsumerDSR_t-1", #"dTotalDSR_t-1", "dConsumerDSR_t-1", 
               # "dAssetVal_t-1",# "dHPI_t-1"
              ]],  # exog WITHOUT the constant (arch adds its own const if mean='ARX')
    mean='ARX',  # linear regression with exog
    lags=0,
    vol='ARCH',
    p=1,
    dist='normal'
)

res = am.fit(disp='off')

print(res.summary())

                          AR-X - ARCH Model Results                           
Dep. Variable:                 dNCO_t   R-squared:                       0.337
Mean Model:                      AR-X   Adj. R-squared:                  0.266
Vol Model:                       ARCH   Log-Likelihood:               -66.0433
Distribution:                  Normal   AIC:                           144.087
Method:            Maximum Likelihood   BIC:                           152.881
                                        No. Observations:                   32
Date:                Fri, Aug 22 2025   Df Residuals:                       28
Time:                        13:21:13   Df Model:                            4
                                  Mean Model                                  
                       coef    std err          t      P>|t|  95.0% Conf. Int.
------------------------------------------------------------------------------
Const                0.0508      0.312      0.163   

In [65]:
base.drop(columns=["YYYYQ", "Date"]).corr()

Unnamed: 0,dNCO_t,dUnemp_t-1,IncGrow_t-1,dFOR_t-1,dTotalDSR_t-1,dConsumerDSR_t-1,dAssetVal_t-1,dHPI_t-1
dNCO_t,1.0,0.042183,0.224271,-0.230288,-0.296138,-0.417125,-0.101751,0.12392
dUnemp_t-1,0.042183,1.0,0.558551,-0.57082,-0.696887,-0.65625,-0.460302,-0.127752
IncGrow_t-1,0.224271,0.558551,1.0,-0.987937,-0.921364,-0.855791,-0.884089,-0.322879
dFOR_t-1,-0.230288,-0.57082,-0.987937,1.0,0.941897,0.904976,0.862861,0.301006
dTotalDSR_t-1,-0.296138,-0.696887,-0.921364,0.941897,1.0,0.973519,0.768874,0.222484
dConsumerDSR_t-1,-0.417125,-0.65625,-0.855791,0.904976,0.973519,1.0,0.689638,0.159068
dAssetVal_t-1,-0.101751,-0.460302,-0.884089,0.862861,0.768874,0.689638,1.0,0.722057
dHPI_t-1,0.12392,-0.127752,-0.322879,0.301006,0.222484,0.159068,0.722057,1.0


In [39]:
X_test

Unnamed: 0_level_0,dUnemp,IncGrow,dFOR,dTotalDSR,dConsumerDSR,dAssetVal,dHPI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-10-01,-0.1,1.411471,0.003009,-0.120587,-0.004224,0.002393,9.84
2023-01-01,0.04,0.706177,0.045825,0.181593,0.130339,-0.021035,-5.91
2023-04-01,-0.04,2.433903,-0.273874,-0.144844,-0.114625,-0.022713,3.64
2023-07-01,0.0,0.628843,-0.014347,0.039902,0.064315,0.029086,18.62
2023-10-01,0.14,0.096901,0.088718,0.190917,0.104447,0.021256,11.38
2024-01-01,0.13,0.525526,,0.364626,0.338265,-0.003198,1.8
2024-04-01,0.03,1.139892,,-0.070515,-0.080552,0.002557,8.77
2024-07-01,0.17,0.021375,,-0.001116,-0.059956,0.032091,16.66
2024-10-01,0.17,-0.155421,,0.153964,0.093062,0.015772,7.05
2025-01-01,-0.04,0.461152,,-0.026323,0.03429,-6.9e-05,3.13


In [37]:
X_train.iloc[:, :].values

array([[-2.60000000e-01,  9.83750979e-01, -9.55990000e-02,
        -1.01533000e-01, -8.02240000e-02, -1.33031027e-03,
         2.68000000e+00],
       [-4.70000000e-01,  1.07109969e+00, -8.90170000e-02,
        -2.62625000e-01, -1.02311000e-01,  6.91863432e-03,
         6.43000000e+00],
       [-1.30000000e-01,  7.24081711e-01, -3.95040000e-02,
         2.58050000e-02,  1.78050000e-02,  5.73049930e-03,
         4.85000000e+00],
       [-3.70000000e-01,  1.20685551e+00, -7.86500000e-02,
        -1.49550000e-02,  6.91240000e-02, -3.17458633e-03,
         2.78000000e+00],
       [-1.70000000e-01,  1.19952019e+00,  1.54490000e-02,
        -7.53720000e-02, -4.51470000e-02, -7.62276724e-04,
         3.81000000e+00],
       [-1.00000000e-01,  1.23178469e-01,  3.17850000e-02,
        -7.14990000e-02,  2.15720000e-02,  1.21178694e-02,
         5.65000000e+00],
       [-3.30000000e-01,  3.43546890e-01,  6.77050000e-02,
         1.21348000e-01,  9.06520000e-02,  8.79607375e-03,
         5.0200000

In [18]:
44-24-6

14

In [15]:
df = df.tail(46)
print(df.head())
print(df.tail())
df.isna().sum()

# Target is change in NCO (ΔNCO); if you already have ΔNCO, skip this line.
# If you already have ΔNCO, set y = df['dNCO'] and adjust code accordingly.
y = df['dNCO'].iloc[1:-1]
X = df.shift(1).iloc[1:-1,2:]
print(y.shape)
print(X.shape)
print(y.index)
print(X.index)

             YYYYQ      dNCO  dUnemp   IncGrow      dFOR  dTotalDSR  \
Date                                                                  
2014-01-01  2014Q1       NaN   -0.26  0.983751 -0.095599  -0.101533   
2014-04-01  2014Q2  0.032508   -0.47  1.071100 -0.089017  -0.262625   
2014-07-01  2014Q3 -0.149256   -0.13  0.724082 -0.039504   0.025805   
2014-10-01  2014Q4  0.369850   -0.37  1.206856 -0.078650  -0.014955   
2015-01-01  2015Q1 -3.658121   -0.17  1.199520  0.015449  -0.075372   

            dConsumerDSR  dAssetVal  dHPI  
Date                                       
2014-01-01     -0.080224  -0.001330  2.68  
2014-04-01     -0.102311   0.006919  6.43  
2014-07-01      0.017805   0.005730  4.85  
2014-10-01      0.069124  -0.003175  2.78  
2015-01-01     -0.045147  -0.000762  3.81  
             YYYYQ      dNCO  dUnemp   IncGrow  dFOR  dTotalDSR  dConsumerDSR  \
Date                                                                            
2024-04-01  2024Q2  3.485420    

In [1]:
# import sys, site, subprocess, importlib, pathlib, os

# print("Interpreter:", sys.executable)
# print("Python:", sys.version)
# print("User site:", site.getusersitepackages())
# print("Sys path[0]:", sys.path[0])

# # 1) Make sure pip we call belongs to THIS interpreter
# subprocess.check_call([sys.executable, "-m", "pip", "--version"])

# # 2) Install/repair arch into this interpreter (force, no cache)
# subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"])
# subprocess.check_call([sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-cache-dir", "arch"])

# # 3) Ensure the user site (where --user installs go) is on sys.path
# user_site = site.getusersitepackages()
# if user_site not in sys.path:
#     sys.path.append(user_site)
#     print("Added to sys.path:", user_site)

# # 4) Try importing now
# import arch
# print("arch version:", arch.__version__)


Interpreter: C:\Users\yss06\llm_env\Scripts\python.exe
Python: 3.13.7 (tags/v3.13.7:bcee1c3, Aug 14 2025, 14:15:11) [MSC v.1944 64 bit (AMD64)]
User site: C:\Users\yss06\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\LocalCache\local-packages\Python313\site-packages
Sys path[0]: C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.13_3.13.2032.0_x64__qbz5n2kfra8p0\python313.zip
Added to sys.path: C:\Users\yss06\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\LocalCache\local-packages\Python313\site-packages
arch version: 7.2.0
