In [11]:
#!/usr/bin/env python3
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, r2_score
from pathlib import Path

# =========================
# INPUTS
# =========================
TRAIN_JSON = "error_analysis/pred_results_train.json"
TEST_JSON  = "error_analysis/pred_results_test.json"

OUT_DIR = Path("figures")
OUT_DIR.mkdir(parents=True, exist_ok=True)

OUT_TRAIN_PDF = OUT_DIR / "A_pred_scatter_train.svg"
OUT_TEST_PDF  = OUT_DIR / "B_pred_scatter_test.svg"

# Plot styling
TRAIN_COLOR = (31/255, 119/255, 180/255)  # blue
TEST_COLOR  = (214/255, 39/255, 40/255)   # red
POINT_ALPHA = 0.5
POINT_SIZE  = 5

FIGSIZE = (4.5, 4.)
AX_MIN = 0.0
AX_MAX = 100_000.0

# =========================
# METRICS
# =========================
def geh_per_point(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    with np.errstate(divide="ignore", invalid="ignore"):
        denom = y_true + np.abs(y_pred)
        geh = np.sqrt(2.0 * (y_true - y_pred) ** 2 / denom)
    return np.nan_to_num(geh, nan=0.0, posinf=0.0, neginf=0.0)

def calculate_metrics(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    mask = np.isfinite(y_true) & np.isfinite(y_pred)
    y_true = y_true[mask]
    y_pred = y_pred[mask]
    if y_true.size == 0:
        return np.nan, np.nan, np.nan
    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mgeh = float(np.mean(geh_per_point(y_true, y_pred)))
    return float(mgeh), float(mae), float(r2)  # MGEH, MAE, R2

# =========================
# LOAD JSON
# =========================
def load_json_xy(path):
    arr = json.loads(Path(path).read_text(encoding="utf-8"))
    gt = np.array([float(r["gt"]) for r in arr], dtype=float)
    pr = np.array([float(r["pred"]) for r in arr], dtype=float)
    return gt, pr

# =========================
# PLOTTER
# =========================
def plot_one_split(gt, pr, color, label, out_pdf):
    mgeh, mae, r2 = calculate_metrics(gt, pr)

    fig, ax = plt.subplots(figsize=FIGSIZE)

    ax.scatter(
        gt, pr,
        s=POINT_SIZE,
        alpha=POINT_ALPHA,
        color=color,
        edgecolors="none",
        label=label
    )

    # y = x reference
    ax.plot(
        [AX_MIN, AX_MAX], [AX_MIN, AX_MAX],
        color="black", linestyle="--", linewidth=1
    )

    ax.set_xlim(AX_MIN, AX_MAX)
    ax.set_ylim(AX_MIN, AX_MAX)
    ax.set_xlabel("Ground truth AADT")
    ax.set_ylabel("Predicted AADT")
    ax.grid(True, alpha=0.2)

    # Legend for points
    # ax.legend(loc="upper left", frameon=True)

    fig.tight_layout()
    fig.savefig(out_pdf, bbox_inches="tight")
    plt.close(fig)

    print("Saved:", out_pdf)
    print(f"{label} metrics (MGEH, MAE, R2):", (mgeh, mae, r2))

# =========================
# RUN
# =========================
gt_tr, pr_tr = load_json_xy(TRAIN_JSON)
gt_te, pr_te = load_json_xy(TEST_JSON)

plot_one_split(
    gt_tr, pr_tr,
    TRAIN_COLOR, "Train",
    OUT_TRAIN_PDF
)

plot_one_split(
    gt_te, pr_te,
    TEST_COLOR, "Test",
    OUT_TEST_PDF
)

Saved: figures\A_pred_scatter_train.svg
Train metrics (MGEH, MAE, R2): (52.375146142230065, 7176.104831870826, 0.7246863445941298)
Saved: figures\B_pred_scatter_test.svg
Test metrics (MGEH, MAE, R2): (52.72421939534167, 7168.301021673356, 0.6972209066405952)


In [58]:
#!/usr/bin/env python3
import json
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# =========================
# INPUTS
# =========================
TRAIN_JSON = "error_analysis/pred_results_train.json"
TEST_JSON  = "error_analysis/pred_results_test.json"

OUT_PDF = "figures/C_hist_sensor_mean_geh_train_test.svg"

# Histogram styling
BINS = 100
FIGSIZE = (4, 2.5)
TRAIN_COLOR = (31/255, 119/255, 180/255)  # blue
TEST_COLOR  = (214/255, 39/255, 40/255)   # red
ALPHA = 1

# =========================
# HELPERS
# =========================
def load_json(path: str):
    return json.loads(Path(path).read_text(encoding="utf-8"))

def parse_edgeid_u_v_k(edge_id: str):
    parts = str(edge_id).split("_")
    if len(parts) < 3:
        raise ValueError(f"Bad edge_id format (expected u_v_key): {edge_id}")
    return int(parts[0]), int(parts[1]), int(parts[2])

def geh_per_point(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    with np.errstate(divide="ignore", invalid="ignore"):
        denom = y_true + np.abs(y_pred)
        geh = np.sqrt(2.0 * (y_true - y_pred) ** 2 / denom)
    # keep NaN for denom=0 cases; we'll nanmean later
    return geh

def mean_geh_per_edge(df):
    """
    df has columns: u,v,key,gt,pred
    returns: array of mean GEH per edge (one value per row)
    """
    gt = df["gt"].to_numpy(dtype=float)
    pr = df["pred"].to_numpy(dtype=float)
    g = geh_per_point(gt, pr)
    return g  # one per edge here (since only one gt/pred per edge)

# =========================
# LOAD + ALIGN
# =========================
train_arr = load_json(TRAIN_JSON)
test_arr  = load_json(TEST_JSON)

def to_df(arr):
    u, v, k, gt, pr = [], [], [], [], []
    for r in arr:
        uu, vv, kk = parse_edgeid_u_v_k(r["edge_id"])
        u.append(uu); v.append(vv); k.append(kk)
        gt.append(float(r["gt"]))
        pr.append(float(r["pred"]))
    import pandas as pd
    return pd.DataFrame({"u": u, "v": v, "key": k, "gt": gt, "pred": pr})

import pandas as pd
df_tr = to_df(train_arr)
df_te = to_df(test_arr)

# If there are duplicates (shouldn't, but safe): average preds/gt by (u,v,key)
df_tr = df_tr.groupby(["u", "v", "key"], as_index=False)[["gt", "pred"]].mean()
df_te = df_te.groupby(["u", "v", "key"], as_index=False)[["gt", "pred"]].mean()

# Compute per-edge GEH (here == "mean GEH per sensor" since single value)
geh_tr = mean_geh_per_edge(df_tr)
geh_te = mean_geh_per_edge(df_te)

# Drop non-finite for histogram stability
geh_tr = geh_tr[np.isfinite(geh_tr)]
geh_te = geh_te[np.isfinite(geh_te)]

# =========================
# PLOT
# =========================
fig, ax = plt.subplots(figsize=FIGSIZE)

ax.hist(geh_tr, bins=BINS, alpha=ALPHA, label="Train", color=TRAIN_COLOR)
ax.hist(geh_te, bins=BINS, alpha=ALPHA, label="Test",  color=TEST_COLOR)

ax.set_xlabel("GEH")
ax.set_ylabel("Frequency")
ax.legend(loc="upper right")

ax.grid(visible=True, which="major", color="lightgray", linestyle="--", linewidth=0.8)

fig.tight_layout()
Path(OUT_PDF).parent.mkdir(parents=True, exist_ok=True)
fig.savefig(OUT_PDF, bbox_inches="tight")
plt.close(fig)

print("Saved:", OUT_PDF)
print(f"Train edges: {len(df_tr):,}  (finite GEH: {len(geh_tr):,})")
print(f"Test  edges: {len(df_te):,}  (finite GEH: {len(geh_te):,})")
print(f"Train mean GEH: {np.mean(geh_tr):.3f} | median: {np.median(geh_tr):.3f}")
print(f"Test  mean GEH: {np.mean(geh_te):.3f} | median: {np.median(geh_te):.3f}")

Saved: figures/C_hist_sensor_mean_geh_train_test.svg
Train edges: 4,070  (finite GEH: 4,070)
Test  edges: 1,018  (finite GEH: 1,018)
Train mean GEH: 52.375 | median: 39.291
Test  mean GEH: 52.724 | median: 38.303


In [10]:
#!/usr/bin/env python3
import json
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# =========================
# INPUT / OUTPUT
# =========================
IN_JSON  = "figures/metric_by_highway_train_vs_test_plot_values.json"
OUT_SVG  = "figures/D_test_only_mae_mgeh_by_highway_horizontal.svg"

# Colors
COLOR_MAE  = (31/255, 119/255, 180/255)  # blue
COLOR_MGEH = (214/255, 39/255, 40/255)   # red

# Plot controls
FIGSIZE = (4.7, 2.5)
BAR_HEIGHT = 0.38

# Axis limits
XLIM_MGEH = (0, 100)        # bottom axis
XLIM_MAE  = (0, 10000)     # top axis

# =========================
# LOAD
# =========================
obj = json.loads(Path(IN_JSON).read_text(encoding="utf-8"))
highway_order = obj.get("highway_order", [])
metrics = obj["metrics"]

MAE_TEST = metrics["MAE"]["Test"]
GEH_TEST = metrics["GEH"]["Test"]

highways = [
    h for h in highway_order
    if h in MAE_TEST and h in GEH_TEST
]
if not highways:
    raise RuntimeError("No overlapping highways found.")

mae_mean  = np.array([MAE_TEST[h]["mean"] for h in highways])
mae_ci    = np.array([MAE_TEST[h]["ci95"] for h in highways])
mgeh_mean = np.array([GEH_TEST[h]["mean"] for h in highways])
mgeh_ci   = np.array([GEH_TEST[h]["ci95"] for h in highways])

y = np.arange(len(highways))

# =========================
# PLOT (horizontal, dual x-axis)
# =========================
fig, ax_bottom = plt.subplots(figsize=FIGSIZE)
ax_top = ax_bottom.twiny()

# Layering
ax_bottom.set_zorder(2)
ax_top.set_zorder(1)
ax_bottom.patch.set_visible(False)
ax_top.patch.set_visible(False)

# ---- Bars ----
ax_bottom.barh(
    y - BAR_HEIGHT/2, mgeh_mean, height=BAR_HEIGHT,
    color=COLOR_MGEH, edgecolor="none",
    xerr=mgeh_ci,
    error_kw=dict(ecolor=COLOR_MGEH, elinewidth=1.2, capsize=3)
)

ax_top.barh(
    y + BAR_HEIGHT/2, mae_mean, height=BAR_HEIGHT,
    color=COLOR_MAE, edgecolor="none",
    xerr=mae_ci,
    error_kw=dict(ecolor=COLOR_MAE, elinewidth=1.2, capsize=3)
)

# ---- Axes limits ----
ax_bottom.set_xlim(*XLIM_MGEH)
ax_top.set_xlim(*XLIM_MAE)

# ---- Labels ----
# ax_bottom.set_xlabel("MGEH (test)", color=COLOR_MGEH)
# ax_top.set_xlabel("MAE (test)", color=COLOR_MAE)
# ---- Labels (place text inline with tick labels, left side) ----
ax_bottom.set_xlabel("")
ax_top.set_xlabel("")

# Put label text to the left of the tick labels (inline), in axes coords
ax_top.text(
    -0.06, 1.066, "MAE (test)",
    transform=ax_top.transAxes,
    color=COLOR_MAE,
    ha="right", va="bottom",
    fontsize=11
)

ax_bottom.text(
    -0.06, -0.07, "MGEH (test)",
    transform=ax_bottom.transAxes,
    color=COLOR_MGEH,
    ha="right", va="top",
    fontsize=11
)


ax_bottom.tick_params(axis="x", colors=COLOR_MGEH)
ax_top.tick_params(axis="x", colors=COLOR_MAE)

ax_bottom.set_yticks(y)
ax_bottom.set_yticklabels(highways)
ax_bottom.invert_yaxis()

# ---- Spine coloring ----
ax_bottom.spines["bottom"].set_color(COLOR_MGEH)
ax_bottom.spines["bottom"].set_linewidth(1.2)
ax_top.spines["top"].set_color(COLOR_MAE)
ax_top.spines["top"].set_linewidth(1.2)

# Clean spines
ax_bottom.spines["top"].set_visible(False)
ax_top.spines["bottom"].set_visible(False)
ax_top.spines["right"].set_visible(False)
ax_bottom.spines["right"].set_visible(False)

# ---- Grid (MGEH axis only) ----
ax_bottom.grid(axis="x", linestyle="--", alpha=0.3)
ax_bottom.set_axisbelow(True)

# ---- Legends ----
# ax.legend().remove()

fig.tight_layout()
Path(OUT_SVG).parent.mkdir(parents=True, exist_ok=True)
fig.savefig(OUT_SVG, bbox_inches="tight")
plt.close(fig)

print("Saved:", OUT_SVG)
print("Highways plotted:", highways)

Saved: figures/D_test_only_mae_mgeh_by_highway_horizontal.svg
Highways plotted: ['motorway', 'motorway link', 'trunk', 'trunk link']
