In [4]:
from pathlib import Path
import numpy as np, pandas as pd, matplotlib.pyplot as plt, json
from sklearn.metrics import r2_score
PROC = Path("../data/processed"); ARR = Path("../reports/tables"); FIG = Path("../reports/figures"); TAB = Path("../reports/tables")
FIG.mkdir(parents=True,exist_ok=True); TAB.mkdir(parents=True,exist_ok=True)
FEATURES = ["bh_mass","bh_acc","stellar_mass","sfr","halo_mass","vel_disp"]
HORIZON_LABELS = ["H1","H3","H5"]
MU = np.load(PROC/"scaler_mean.npy"); SD = np.load(PROC/"scaler_scale.npy"); TRANSFORM = json.loads((PROC/"transform_config.json").read_text())
def inv_z_mat(Z): return Z*SD + MU
def inv_forward(a,name):
    t=TRANSFORM[name]["type"]
    if t=="log10_floor":
        floor=TRANSFORM[name]["floor"]
        return np.maximum(10.0**a,floor)
    return a
def rmse(a,b,axis=None): return np.sqrt(np.mean((a-b)**2,axis=axis))
Y_true_z = np.load(ARR/"Y_true_z.npy")
Y_pred_z = np.load(ARR/"Y_pred_z.npy")
Xt_z = np.load(ARR/"Xt_z.npy")


In [5]:
H = Y_true_z.shape[1]; F = Y_true_z.shape[2]
rmse_z = np.zeros((H,F)); rmse_phys = np.zeros((H,F))
for h in range(H):
    rmse_z[h,:] = rmse(Y_true_z[:,h,:], Y_pred_z[:,h,:], axis=0)
    Zt = inv_z_mat(Y_true_z[:,h,:]); Zp = inv_z_mat(Y_pred_z[:,h,:])
    Yt = np.zeros_like(Zt); Yp = np.zeros_like(Zp)
    for j,name in enumerate(FEATURES):
        Yt[:,j] = inv_forward(Zt[:,j], name)
        Yp[:,j] = inv_forward(Zp[:,j], name)
    rmse_phys[h,:] = rmse(Yt, Yp, axis=0)
rmse_z_df = pd.DataFrame(rmse_z, columns=FEATURES, index=HORIZON_LABELS)
rmse_phys_df = pd.DataFrame(rmse_phys, columns=FEATURES, index=HORIZON_LABELS)
rmse_z_df.to_csv(TAB/"rmse_test_lstm_z_FIXED.csv")
rmse_phys_df.to_csv(TAB/"rmse_test_lstm_physical_FIXED.csv")
rmse_z_df, rmse_phys_df


(     bh_mass    bh_acc  stellar_mass       sfr  halo_mass  vel_disp
 H1  0.456114  0.437896      0.132786  0.563527   0.235452  0.226329
 H3  0.576406  0.537377      0.261931  0.461727   0.451612  0.325954
 H5  0.525281  0.497619      0.289212  0.426366   0.600853  0.354014,
      bh_mass    bh_acc  stellar_mass       sfr  halo_mass   vel_disp
 H1  0.033011  0.020150      0.102649  6.465689   6.188580   6.529059
 H3  0.004635  0.011077      0.202484  5.297684  11.870134   9.402980
 H5  0.005330  0.009409      0.223573  4.891962  15.792757  10.212455)

In [6]:
plt.figure(figsize=(6,4))
plt.plot([1,3,5], rmse_z_df.mean(axis=1).values, marker="o")
plt.xlabel("Horizon Δ"); plt.ylabel("RMSE (z)"); plt.title("RMSE vs Horizon — LSTM")
plt.xticks([1,3,5], HORIZON_LABELS)
plt.tight_layout()
plt.savefig(FIG/"rmse_vs_horizon_lstm_FIXED.png", dpi=300, bbox_inches="tight")
plt.close()


In [7]:
h=0
vals_z = rmse_z_df.iloc[h].values
vals_p = rmse_phys_df.iloc[h].values
fig,ax=plt.subplots(figsize=(8,4)); ax.bar(FEATURES, vals_z); ax.set_ylabel("RMSE (z)"); ax.set_title("RMSE by Feature (H=1, z)"); plt.xticks(rotation=30,ha="right"); plt.tight_layout(); plt.savefig(FIG/"rmse_h1_feature_lstm_z_FIXED.png",dpi=300,bbox_inches="tight"); plt.close(fig)
fig,ax=plt.subplots(figsize=(8,4)); ax.bar(FEATURES, vals_p); ax.set_ylabel("RMSE (physical)"); ax.set_title("RMSE by Feature (H=1, physical)"); plt.xticks(rotation=30,ha="right"); plt.tight_layout(); plt.savefig(FIG/"rmse_h1_feature_lstm_phys_FIXED.png",dpi=300,bbox_inches="tight"); plt.close(fig)
"ok"


'ok'

In [8]:
h=0
Zt = inv_z_mat(Y_true_z[:,h,:]); Zp = inv_z_mat(Y_pred_z[:,h,:])
Yt = np.zeros_like(Zt); Yp = np.zeros_like(Zp)
for j,name in enumerate(FEATURES):
    Yt[:,j] = inv_forward(Zt[:,j], name)
    Yp[:,j] = inv_forward(Zp[:,j], name)
for j,name in enumerate(FEATURES):
    x = Yt[:,j]; y = Yp[:,j]
    lo,hi = np.quantile(np.concatenate([x,y]), [0.01,0.99])
    fig,ax=plt.subplots(figsize=(5.8,5.2))
    ax.hexbin(x,y,gridsize=60,mincnt=1)
    ax.plot([lo,hi],[lo,hi],"--",lw=2)
    r2 = r2_score(x,y); r = rmse(x,y)
    ax.set_title(f"Parity H1 — {name}  R²={r2:.3f}  RMSE={r:.3g}")
    ax.set_xlabel(f"True {name}"); ax.set_ylabel(f"Pred {name}")
    ax.set_xlim(lo,hi); ax.set_ylim(lo,hi)
    plt.tight_layout()
    plt.savefig(FIG/f"parity_H1_{name}_FIXED.png", dpi=300, bbox_inches="tight")
    plt.close(fig)
"ok"


'ok'

In [9]:
Yp_pers_z = np.repeat(Xt_z[:,None,:], repeats=Y_true_z.shape[1], axis=1)
def dump_rmse_tables(Y_true_z,Y_pred_z,tag):
    H = Y_true_z.shape[1]; F = Y_true_z.shape[2]
    rz = np.zeros((H,F)); rp = np.zeros((H,F))
    for h in range(H):
        rz[h,:] = rmse(Y_true_z[:,h,:], Y_pred_z[:,h,:], axis=0)
        Zt = inv_z_mat(Y_true_z[:,h,:]); Zp = inv_z_mat(Y_pred_z[:,h,:])
        Yt = np.zeros_like(Zt); Yp = np.zeros_like(Zp)
        for j,name in enumerate(FEATURES):
            Yt[:,j] = inv_forward(Zt[:,j], name)
            Yp[:,j] = inv_forward(Zp[:,j], name)
        rp[h,:] = rmse(Yt, Yp, axis=0)
    pd.DataFrame(rz,columns=FEATURES,index=HORIZON_LABELS).to_csv(TAB/f"rmse_{tag}_z_FIXED.csv")
    pd.DataFrame(rp,columns=FEATURES,index=HORIZON_LABELS).to_csv(TAB/f"rmse_{tag}_phys_FIXED.csv")
dump_rmse_tables(Y_true_z, Yp_pers_z, "persistence")
"ok"


'ok'

In [10]:
hist = pd.read_csv(TAB/"training_history.csv")
plt.figure(figsize=(6.4,4))
plt.plot(hist["epoch"],hist["train"],label="train")
plt.plot(hist["epoch"],hist["val"],label="val")
plt.xlabel("Epoch"); plt.ylabel("MSE"); plt.title("Convergence"); plt.legend(); plt.tight_layout()
plt.savefig(FIG/"lstm_convergence_FIXED.png", dpi=300, bbox_inches="tight")
plt.close()


In [11]:
plt.figure(figsize=(6.4,3.6))
plt.plot(hist["epoch"],hist["lr"])
plt.xlabel("Epoch"); plt.ylabel("LR"); plt.title("Learning Rate Schedule"); plt.tight_layout()
plt.savefig(FIG/"learning_rate_schedule_FIXED.png", dpi=300, bbox_inches="tight")
plt.close()


In [12]:
lstm_overall = pd.read_csv(TAB/"rmse_test_lstm_z_FIXED.csv",index_col=0).mean(axis=1).values
pers_overall = pd.read_csv(TAB/"rmse_persistence_z_FIXED.csv",index_col=0).mean(axis=1).values
plt.figure(figsize=(6,4))
plt.plot([1,3,5], lstm_overall, marker="o", label="LSTM")
plt.plot([1,3,5], pers_overall, marker="o", label="Persistence")
plt.xlabel("Horizon Δ"); plt.ylabel("RMSE (z)"); plt.title("Model Comparison (Lower Better)")
plt.xticks([1,3,5], HORIZON_LABELS); plt.legend(); plt.tight_layout()
plt.savefig(FIG/"model_comparison_rmse_vs_horizon_FIXED.png", dpi=300, bbox_inches="tight")
plt.close()


In [13]:
summ = {"best_val_mse": float(hist["val"].min()), "epochs_run": int(hist["epoch"].max())}
Path("../reports/exports").mkdir(parents=True,exist_ok=True)
Path("../reports/exports/results_summary_FIXED.json").write_text(json.dumps(summ,indent=2))
summ


{'best_val_mse': 0.1567917232556193, 'epochs_run': 44}