In [None]:
import sys
from pathlib import Path
# set the notebook's CWD to your repo root
%cd D:/deepdemand
ROOT = Path.cwd().parents[0]   # go up one level
sys.path.insert(0, str(ROOT))


In [26]:
year = 2040

In [None]:
import os
import random
import datetime
from pathlib import Path
import numpy as np
import torch

from config import DATA, TRAINING
from model.deepdemand import DeepDemand
from model.dataloader import load_gt, load_json, get_lsoa_vector
import model.utils as utils


def set_seeds(seed: int):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)


def build_feature_bank():
    """
    Rebuild the same LSOA feature bank used in training.
    Honors optional PCA in TRAINING config (same as your trainer).
    """
    lsoa_json = load_json("data/node_features/lsoa21_features_normalized_2040.json")
    node_to_lsoa = load_json("data/node_features/node_to_lsoa.json")

    lsoa_codes = sorted(lsoa_json.keys())
    rows = [get_lsoa_vector(lsoa_json[c]).cpu().numpy() for c in lsoa_codes]
    X = np.vstack(rows).astype(np.float32)

    feature_bank = {}

    if TRAINING.get("pca", False):
        # Recompute PCA (same seed + same data → same result); or load from .npz if you prefer.
        k = int(TRAINING.get("pca_components", 32))
        Xp, mean, comps = utils.pca_project(X, k)
        for i, code in enumerate(lsoa_codes):
            feature_bank[code] = torch.from_numpy(Xp[i])
    else:
        for i, code in enumerate(lsoa_codes):
            feature_bank[code] = torch.from_numpy(X[i])

    return feature_bank, node_to_lsoa


def load_model(checkpoint_path: str, device: torch.device) -> DeepDemand:
    feature_bank, node_to_lsoa = build_feature_bank()
    model = DeepDemand(feature_bank=feature_bank, node_to_lsoa=node_to_lsoa).to(device)

    if not os.path.isfile(checkpoint_path):
        raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}")

    state = torch.load(checkpoint_path, map_location=device, weights_only=True)
    model.load_state_dict(state)
    model.eval()
    return model

@torch.no_grad()
def infer_on_edges(model: DeepDemand, edge_ids, edge_to_gt, scaler, device: torch.device):
    preds, gts = [], []
    for eid in edge_ids:
        gt = edge_to_gt[eid]
        y_hat = model(eid)
        if y_hat is None or y_hat.numel() == 0:
            continue

        # Move to CPU numpy
        y_hat = y_hat.detach().cpu().numpy().item()
        gt = float(gt)

        # Inverse scale (match your training/eval)
        if scaler:
            gt_arr = torch.tensor([gt], dtype=torch.float32).view(1, 1)
            y_arr = torch.tensor([y_hat], dtype=torch.float32).view(1, 1)
            gt = utils.inverse_transform_tensor(gt_arr, scaler).item() if hasattr(utils, "inverse_transform_tensor") else scaler.inverse_transform(gt_arr).item()
            y_hat = utils.inverse_transform_tensor(y_arr, scaler).item() if hasattr(utils, "inverse_transform_tensor") else scaler.inverse_transform(y_arr).item()

        gts.append(gt)
        preds.append(y_hat)

    return np.array(gts, dtype=np.float64), np.array(preds, dtype=np.float64)


In [28]:
checkpoint_path = "param/projection/best_stage_1_lr1e-03.pt"

set_seeds(TRAINING['seed'])
device = torch.device(TRAINING['device'])

# Load GT and split
edge_to_gt, scaler = load_gt()
all_edge_ids = list(edge_to_gt.keys())

# Load model
model = load_model(checkpoint_path, device)

# Inference
gt, pred = infer_on_edges(model, all_edge_ids, edge_to_gt, scaler, device)

# Save arrays
out_dir = "projection"
Path(out_dir).mkdir(parents=True, exist_ok=True)
ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")


Number of valid edges: 5088

=== GT Descriptive Statistics (raw) ===
Min     : 191.405
Max     : 113436.372
Mean    : 25243.410
Median  : 20618.627
Std     : 18893.461

[PCA] Reduced from 20 → 20 dims; explained variance = 100.00%
LSOA feature dim (preloaded): 20


In [29]:
import json
from pathlib import Path

def save_results_json(edge_ids, gt_arr, pred_arr, out_path):
    out = []
    for eid, gt, pred in zip(edge_ids, gt_arr, pred_arr):
        out.append({
            "edge_id": eid,
            "gt": float(gt),
            "pred": float(pred)
        })
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    with open(out_path, "w") as f:
        json.dump(out, f, indent=2)
    print(f"Saved {len(out)} entries to {out_path}")

save_results_json(
    all_edge_ids,
    gt,
    pred,
    f"{out_dir}/prediction_{year}.json"
)

Saved 5088 entries to projection/prediction_2040.json
