In [1]:
from pathlib import Path
import json, numpy as np, pandas as pd, torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader


In [2]:
CFG = {"DATA_CSV":"../data/black_hole_evolution_tng100.csv","FEATURES":["bh_mass","bh_acc","stellar_mass","sfr","halo_mass","vel_disp"],"HORIZONS":[1,3,5],"T_IN":8,"BATCH":128,"EPOCHS":50,"PATIENCE":12,"LR":1e-3,"FACTOR":0.5,"MIN_LR":1.25e-4,"HIDDEN":128,"LAYERS":1,"DROPOUT":0.1}
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
np.random.seed(42); torch.manual_seed(42)


<torch._C.Generator at 0x7f9060b59cb0>

In [3]:
df = pd.read_csv(CFG["DATA_CSV"]).sort_values(["subhalo_id","snapshot"]).reset_index(drop=True)
FEATURES = CFG["FEATURES"]


In [5]:
TRANSFORM = {"bh_mass":{"type":"log10_floor","floor":1e-10},"bh_acc":{"type":"log10_floor","floor":1e-12},"stellar_mass":{"type":None},"sfr":{"type":None},"halo_mass":{"type":None},"vel_disp":{"type":None}}
def fwd_transform(x,name):
    t=TRANSFORM[name]["type"]
    if t=="log10_floor":
        f=TRANSFORM[name]["floor"]
        return np.log10(np.maximum(x,f))
    return x
def apply_forward(dfin):
    d=dfin.copy()
    for f in FEATURES:
        d[f]=fwd_transform(d[f].values,f)
    return d


In [6]:
rng = np.random.default_rng(42)
ids = df["subhalo_id"].unique()
rng.shuffle(ids)
n = len(ids); n_train = int(0.7*n); n_val = int(0.15*n)
train_ids = ids[:n_train]; val_ids = ids[n_train:n_train+n_val]; test_ids = ids[n_train+n_val:]
train_df = df[df["subhalo_id"].isin(train_ids)].reset_index(drop=True)
val_df   = df[df["subhalo_id"].isin(val_ids)].reset_index(drop=True)
test_df  = df[df["subhalo_id"].isin(test_ids)].reset_index(drop=True)
train_tf = apply_forward(train_df)
val_tf   = apply_forward(val_df)
mu = train_tf[FEATURES].mean().values.astype(np.float32)
sd = train_tf[FEATURES].std(ddof=0).replace(0,1.0).values.astype(np.float32)
PROC_DIR = Path("../data/processed"); PROC_DIR.mkdir(parents=True,exist_ok=True)
np.save(PROC_DIR/"scaler_mean.npy",mu)
np.save(PROC_DIR/"scaler_scale.npy",sd)
(Path(PROC_DIR/"transform_config.json")).write_text(json.dumps(TRANSFORM,indent=2))


285

In [7]:
class BHTimeSeriesDataset(Dataset):
    def __init__(self, df_, features, horizons, mu, sd, t_in=8):
        self.features=features; self.horizons=horizons; self.mu=mu.astype(np.float32); self.sd=np.where(sd==0,1.0,sd).astype(np.float32); self.t_in=t_in
        d=apply_forward(df_).sort_values(["subhalo_id","snapshot"]).reset_index(drop=True)
        self.tracks=[]
        for tid,g in d.groupby("subhalo_id"):
            snaps=g["snapshot"].values
            X=g[self.features].values.astype(np.float32)
            self.tracks.append((int(tid),snaps,X))
    def __len__(self):
        n=0; Hmax=max(self.horizons)
        for _,snaps,_ in self.tracks:
            n+=max(0,len(snaps)-(self.t_in+Hmax)+1)
        return n
    def __getitem__(self,idx):
        Hmax=max(self.horizons)
        for _,snaps,X in self.tracks:
            m=max(0,len(snaps)-(self.t_in+Hmax)+1)
            if idx<m:
                i=idx
                seg=X[i:i+self.t_in+Hmax]
                Xin=seg[:self.t_in]
                Yout=np.stack([seg[self.t_in-1+h] for h in self.horizons],axis=0)
                Xz=(Xin-self.mu)/self.sd
                Yz=(Yout-self.mu)/self.sd
                return torch.tensor(Xz,dtype=torch.float32),torch.tensor(Yz,dtype=torch.float32)
            idx-=m
        raise IndexError


In [8]:
class LSTMForecaster(nn.Module):
    def __init__(self, in_dim, hidden, layers, dropout, horizons, out_dim):
        super().__init__()
        self.lstm=nn.LSTM(input_size=in_dim,hidden_size=hidden,num_layers=layers,batch_first=True,dropout=dropout if layers>1 else 0.0)
        self.head=nn.Linear(hidden,horizons*out_dim)
        self.horizons=horizons; self.out_dim=out_dim
    def forward(self,x):
        out,_=self.lstm(x)
        h=out[:,-1,:]
        y=self.head(h)
        y=y.view(-1,self.horizons,self.out_dim)
        return y


In [10]:
HORIZONS = CFG["HORIZONS"]; T_IN = CFG["T_IN"]; BATCH = CFG["BATCH"]
train_ds = BHTimeSeriesDataset(train_df, FEATURES, HORIZONS, mu, sd, t_in=T_IN)
val_ds   = BHTimeSeriesDataset(val_df,   FEATURES, HORIZONS, mu, sd, t_in=T_IN)
train_loader = DataLoader(train_ds,batch_size=BATCH,shuffle=True,drop_last=False)
val_loader   = DataLoader(val_ds,batch_size=BATCH,shuffle=False,drop_last=False)
len(train_ds),len(val_ds)


(5182, 1115)

In [11]:
model = LSTMForecaster(in_dim=len(FEATURES),hidden=CFG["HIDDEN"],layers=CFG["LAYERS"],dropout=CFG["DROPOUT"],horizons=len(HORIZONS),out_dim=len(FEATURES)).to(DEVICE)
opt = torch.optim.AdamW(model.parameters(),lr=CFG["LR"])
crit = nn.MSELoss()
sched = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,mode="min",factor=CFG["FACTOR"],patience=4,min_lr=CFG["MIN_LR"])
best=float("inf"); bad=0; history=[]
for epoch in range(1,CFG["EPOCHS"]+1):
    model.train(); tr_loss=0.0; ntr=0
    for Xb,Yb in train_loader:
        Xb=Xb.to(DEVICE); Yb=Yb.to(DEVICE)
        opt.zero_grad()
        Pb=model(Xb)
        loss=crit(Pb,Yb)
        loss.backward(); opt.step()
        tr_loss+=loss.item()*Xb.size(0); ntr+=Xb.size(0)
    tr=tr_loss/ntr
    model.eval(); va_loss=0.0; nva=0
    with torch.no_grad():
        for Xb,Yb in val_loader:
            Xb=Xb.to(DEVICE); Yb=Yb.to(DEVICE)
            Pb=model(Xb)
            loss=crit(Pb,Yb)
            va_loss+=loss.item()*Xb.size(0); nva+=Xb.size(0)
    va=va_loss/nva
    sched.step(va)
    history.append((epoch,tr,va,opt.param_groups[0]["lr"]))
    if va<best:
        best=va; bad=0
        Path("../models").mkdir(parents=True,exist_ok=True)
        torch.save(model.state_dict(),"../models/lstm_blackhole_best.pt")
    else:
        bad+=1
    if bad>=CFG["PATIENCE"]:
        break
hist_df=pd.DataFrame(history,columns=["epoch","train","val","lr"])
Path("../reports/tables").mkdir(parents=True,exist_ok=True)
hist_df.to_csv("../reports/tables/training_history.csv",index=False)
best


0.15679172325561935

In [12]:
test_ds = BHTimeSeriesDataset(test_df, FEATURES, HORIZONS, mu, sd, t_in=T_IN)
test_loader = DataLoader(test_ds,batch_size=BATCH,shuffle=False)
m = LSTMForecaster(in_dim=len(FEATURES),hidden=CFG["HIDDEN"],layers=CFG["LAYERS"],dropout=CFG["DROPOUT"],horizons=len(HORIZONS),out_dim=len(FEATURES)).to(DEVICE)
m.load_state_dict(torch.load("../models/lstm_blackhole_best.pt",map_location=DEVICE))
m.eval()


LSTMForecaster(
  (lstm): LSTM(6, 128, batch_first=True)
  (head): Linear(in_features=128, out_features=18, bias=True)
)

In [13]:
Y_true_list=[]; Y_pred_list=[]; X_last_list=[]
with torch.no_grad():
    for Xb,Yb in test_loader:
        Xb=Xb.to(DEVICE); Yb=Yb.to(DEVICE)
        Pb=m(Xb)
        Y_true_list.append(Yb.cpu().numpy())
        Y_pred_list.append(Pb.cpu().numpy())
        X_last_list.append(Xb[:,-1,:].cpu().numpy())
Y_true_z = np.concatenate(Y_true_list,axis=0)
Y_pred_z = np.concatenate(Y_pred_list,axis=0)
Xt_z = np.concatenate(X_last_list,axis=0)
ARR = Path("../reports/tables"); ARR.mkdir(parents=True,exist_ok=True)
np.save(ARR/"Y_true_z.npy",Y_true_z)
np.save(ARR/"Y_pred_z.npy",Y_pred_z)
np.save(ARR/"Xt_z.npy",Xt_z)
Y_true_z.shape,Y_pred_z.shape,Xt_z.shape


((1115, 3, 6), (1115, 3, 6), (1115, 6))

In [14]:
X_windows = []
with torch.no_grad():
    for Xb, Yb in test_loader:
        X_windows.append(Xb.cpu().numpy())
X_windows_z = np.concatenate(X_windows, axis=0)
np.save(Path("../reports/tables")/"X_windows_z.npy", X_windows_z)
"Saved X_windows_z for explainability."


'Saved X_windows_z for explainability.'