# E05 OD 模型的敏感性分析（涨落—响应的城市版本）

> 目标：把“参数扰动 → 系统响应”做成一个可复用的小实验。  
> 我们以 **MaxEnt + IPF 的 OD 推断**为基线模型，研究成本敏感性 \(\beta\) 改变时，OD 结构如何变化。

## 你需要输出
- \(\beta\) 扫描下的 **平均成本** 曲线
- 与 \(\beta=0\) 基线相比的 **分布位移**（用 KL 作为最小指标）
- “变化最大”的 OD 对（Top-K）


In [None]:
import os
import sys

# Add statphys_urban_learning to sys.path for local imports
curr = os.path.abspath('')
while curr != os.path.dirname(curr):
    if 'statphys_urban_learning' in os.listdir(curr):
        target = os.path.join(curr, 'statphys_urban_learning')
        if target not in sys.path:
            sys.path.insert(0, target)
        break
    curr = os.path.dirname(curr)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from projects.p01_maxent_od.src.ipf import maxent_od


## 1) 构造一个最小城市 toy（区域位置 → 成本矩阵）

In [None]:
rng = np.random.default_rng(0)

n_zones = 8
coords = rng.uniform(0, 1, size=(n_zones, 2))

# cost: Euclidean distance (can be replaced by travel time)
diff = coords[:, None, :] - coords[None, :, :]
cost = np.sqrt(np.sum(diff**2, axis=2))

# avoid trivial zero diagonal dominating
np.fill_diagonal(cost, 0.2)

cost[:3, :3]


## 2) 给定边际（出发 O、到达 D）

In [None]:
O = rng.uniform(50, 200, size=n_zones)
D = rng.uniform(50, 200, size=n_zones)

# normalize totals to match exactly
D = D * (O.sum() / D.sum())

O.sum(), D.sum()


## 3) 定义一些指标：平均成本、KL 距离、Top-K 变化

In [None]:
def normalize(T: np.ndarray) -> np.ndarray:
    T = T.astype(float)
    return T / T.sum()

def avg_cost(T: np.ndarray, cost: np.ndarray) -> float:
    return float(np.sum(T * cost) / np.sum(T))

def kl(P: np.ndarray, Q: np.ndarray, eps: float = 1e-12) -> float:
    P = np.clip(P, eps, None)
    Q = np.clip(Q, eps, None)
    return float(np.sum(P * (np.log(P) - np.log(Q))))

def topk_changes(T0: np.ndarray, T1: np.ndarray, k: int = 10):
    # absolute change
    d = np.abs(T1 - T0)
    idx = np.dstack(np.unravel_index(np.argsort(d.ravel())[::-1], d.shape))[0]
    out = []
    for (i, j) in idx[:k]:
        out.append((int(i), int(j), float(T0[i, j]), float(T1[i, j]), float(d[i, j])))
    return pd.DataFrame(out, columns=["i", "j", "T_beta0", "T_beta", "abs_change"])


## 4) 扫描 \(\beta\)：无成本 prior（\(\beta=0\)） vs 带成本 prior

In [None]:
betas = [0.0, 0.2, 0.5, 1.0, 2.0]

rows = []
T_beta0 = None
P_beta0 = None

for beta in betas:
    prior = np.exp(-beta * cost)
    T = maxent_od(O, D, prior=prior, max_iter=5000, tol=1e-10)

    if beta == 0.0:
        T_beta0 = T
        P_beta0 = normalize(T)

    P = normalize(T)
    rows.append({
        "beta": beta,
        "avg_cost": avg_cost(T, cost),
        "KL_vs_beta0": kl(P, P_beta0),
    })

df = pd.DataFrame(rows)
df


In [None]:
plt.figure()
plt.plot(df["beta"], df["avg_cost"], marker="o")
plt.xlabel("beta (cost sensitivity)")
plt.ylabel("average cost")
plt.title("Average cost vs beta")
plt.grid(True)
plt.show()


In [None]:
plt.figure()
plt.plot(df["beta"], df["KL_vs_beta0"], marker="o")
plt.xlabel("beta (cost sensitivity)")
plt.ylabel("KL(T_beta || T_beta0)")
plt.title("Distribution shift vs beta (KL divergence)")
plt.grid(True)
plt.show()


## 5) 局部敏感性：用有限差分近似 \(d\langle c\rangle/d\beta\)

In [None]:
def compute_T(beta: float) -> np.ndarray:
    prior = np.exp(-beta * cost)
    return maxent_od(O, D, prior=prior, max_iter=5000, tol=1e-10)

beta0 = 0.5
eps = 1e-2
T_minus = compute_T(beta0 - eps)
T_plus  = compute_T(beta0 + eps)

d_avg_cost = (avg_cost(T_plus, cost) - avg_cost(T_minus, cost)) / (2 * eps)
d_avg_cost


## 6) 哪些 OD 对变化最大？（Top-K）

In [None]:
beta_compare = 1.0
T_beta = compute_T(beta_compare)

topk = topk_changes(T_beta0, T_beta, k=12)
topk


## 讨论（写在你自己的报告里）
- 你观察到 \(\beta\) 增大时平均成本如何变化？这符合直觉吗？
- KL 指标随 \(\beta\) 增大如何变化？它在这里表达了什么“结构变化”？
- Top-K 的 OD 对是否集中在某些区域？这对政策含义是什么？