In [None]:
# 让 Notebook 可以从 ../src 导入你写的包（gdpviz）
from pathlib import Path
import sys

SRC = (Path.cwd().parent / "src").resolve()  # 计算出 ../src 的绝对路径
if str(SRC) not in sys.path:                 # 如果还没在搜索路径里
    sys.path.insert(0, str(SRC))             # 放到最前面，确保优先使用你的源码

# 可选：快速验证当前工作目录和 src 路径
print("CWD:", Path.cwd())
print("SRC added:", SRC)


In [None]:
# 验证能否导入你写的模块
import gdpviz.plotting as plotting
import inspect
print("loaded from:", plotting.__file__)                 # 应该指向 .../src/gdpviz/plotting.py
print("signature:", inspect.signature(plotting.plot_countries))


In [None]:
# --- make src/ importable in notebook (since we didn't pip install -e .) ---
from pathlib import Path
import sys

SRC = (Path.cwd().parent / "src").resolve()
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

from gdpviz.data_loaders import fetch_worldbank_gdp
from gdpviz.cleaning import drop_missing_and_nonpositive
import pandas as pd

DATA_PATH = Path("../data/gdp_2000_2022.parquet")

# If parquet engine isn't installed, switch to CSV as fallback
def load_or_fetch():
    if DATA_PATH.exists():
        try:
            return pd.read_parquet(DATA_PATH)
        except Exception:
            # Fallback to CSV if parquet not available
            csv_path = DATA_PATH.with_suffix(".csv")
            if csv_path.exists():
                return pd.read_csv(csv_path)
    # fetch then cache
    df_ = fetch_worldbank_gdp(start=2000, end=2022)
    df_ = drop_missing_and_nonpositive(df_)
    DATA_PATH.parent.mkdir(parents=True, exist_ok=True)
    try:
        df_.to_parquet(DATA_PATH)
    except Exception:
        df_.to_csv(DATA_PATH.with_suffix(".csv"), index=False)
    return df_

df = load_or_fetch()
df.head(), df.shape


In [None]:
from pathlib import Path
import sys, inspect
import gdpviz.plotting as plotting

print("cwd:", Path.cwd())
print("sys.path[0]:", sys.path[0])             # 期望看到 .../gdp-viz-2000-2022/src
print("loaded from:", plotting.__file__)       # 期望看到 .../gdp-viz-2000-2022/src/gdpviz/plotting.py
print("signature:", inspect.signature(plotting.plot_countries))


In [None]:
import inspect, importlib, gdpviz.plotting as plotting

# 打印完整签名
print("signature:", inspect.signature(plotting.plot_countries))

# 如果输出里没有 unit / logy / normalize_base_year，就强制刷新后再看一遍
importlib.reload(plotting)
print("after reload:", inspect.signature(plotting.plot_countries))


In [None]:
import importlib, inspect, gdpviz.plotting as plotting
importlib.reload(plotting)
print("signature:", inspect.signature(plotting.plot_countries))


In [None]:
# 如果上面第一格已经把 ../src 加进 sys.path 且 df 已经加载，这里可直接运行
# 如果 df 尚未定义，请先运行你前面加载数据的那一格

import gdpviz.plotting as plotting

countries = ["USA","China","Germany","Japan","UK","Brazil","Switzerland"]

plotting.plot_countries(
    df,
    countries=countries,
    title="GDP 2000–2022 (trillion, constant 2015 US$)",
    save_path="../figs/gdp_trillion.png",  # 保存到仓库 figs/ 文件夹
    show=False,
    unit="trillion",                       # 关键：以“万亿美元”显示
    logy=False,
    normalize_base_year=None
)

print("Saved ../figs/gdp_trillion.png")


In [None]:
from IPython.display import Image, display
display(Image(filename="../figs/gdp_trillion.png"))

#水平（Level）图：线性坐标
#适合看“谁的体量更大、绝对差距多大”。（这张 trillion 线性坐标图就是这个用途。）

In [None]:
import pandas as pd

countries = ["USA","China","Germany","Japan","UK","Brazil","Switzerland"]

check = (
    df[df["year"].isin([2000, 2022])]
    .pivot_table(index="country", columns="year", values="value", aggfunc="first")
    .loc[countries]
    .assign(growth=lambda x: x[2022] / x[2000] - 1)
)

# 以“万亿美元”显示两端点；增幅百分比更直观
display(
    pd.DataFrame({
        "2000 (trn)": (check[2000] / 1e12).round(2),
        "2022 (trn)": (check[2022] / 1e12).round(2),
        "growth %":  (check["growth"] * 100).round(1),
    })
)


In [None]:
import gdpviz.plotting as plotting

countries = ["USA","China","Germany","Japan","UK","Brazil","Switzerland"]

plotting.plot_countries(
    df,
    countries=countries,
    title="GDP 2000–2022 (index, base=2000 = 100, constant 2015 US$)",
    save_path="../figs/gdp_index_2000.png",
    show=True,
    normalize_base_year=2000   # 关键：2000=100
)
print("Saved ../figs/gdp_index_2000.png")

# 指数化（Base=100）图 ✅：适合看“相对增长”：各国从同一基期（2000=100）出发，谁涨得更快更直观，读者容易理解。
# 向非专业读者解释谁涨得快 → 用 指数化图（2000=100）。


In [None]:
plotting.plot_countries(
    df, countries,
    title="GDP 2000–2022 (trillion, log scale, constant 2015 US$)",
    save_path="../figs/gdp_trillion_log.png",
    show=True,
    unit="trillion", logy=True
)

# 对数坐标（Log）水平图 ✅：# 适合看“长期增长率”。
# 在 log 轴上，恒定的百分比增长是直线，且斜率可比较增长率（两国线条近似平行=增速相近）。
# 第二张“trillion + log scale”符合这个“经济学标准姿势”。
# 研究“长期增速/拐点/对比增长率” → 用 对数坐标图（更“经济学范儿”）



In [None]:
from IPython.display import Image, display

# 指向你刚刚保存的两张图
display(Image(filename="../figs/gdp_index_2000.png"))
display(Image(filename="../figs/gdp_trillion_log.png"))


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

def plot_trillion_log_pro(
    df_tidy: pd.DataFrame,
    countries,
    title="GDP 2000–2022 (trillion, log scale, constant 2015 US$)",
    save_path="../figs/gdp_trillion_log_pro.png",
    event_years=(2009, 2020),
    source="Source: World Bank (NY.GDP.MKTP.KD), constant 2015 US$; made by @Wanting",
):
    # 取国别
    data = df_tidy[df_tidy["country"].isin(countries)].copy()
    # 万亿美元
    data["value_trn"] = data["value"] / 1e12

    fig, ax = plt.subplots(figsize=(8, 5))
    for c in countries:
        sub = data[data["country"] == c].sort_values("year")
        ax.plot(sub["year"], sub["value_trn"], label=c)

    ax.set_yscale("log")
    ax.set_title(title, pad=10)
    ax.set_xlabel("Year")
    ax.set_ylabel("GDP (trillion, 2015 US$)")
    ax.grid(True, which="both", linestyle="--", alpha=0.3)
    ax.legend(loc="upper left", ncol=1, frameon=False)

    # 重要年份标注
    for y in event_years:
        ax.axvline(y, color="gray", lw=1, ls="--", alpha=0.7)
        ax.text(y+0.1, ax.get_ylim()[0]*1.02, f"{y}", color="gray", fontsize=9)

    # 图脚：数据来源与说明
    plt.figtext(
        0.01, -0.02,
        source + " • log scale (slope ≈ growth rate)",
        ha="left", va="top", fontsize=9, color="#444"
    )

    fig.tight_layout()
    if save_path:
        fig.savefig(save_path, dpi=150, bbox_inches="tight")
    plt.show()


def plot_index_base_pro(
    df_tidy: pd.DataFrame,
    countries,
    base_year=2000,
    title="GDP 2000–2022 (index, base=2000 = 100, constant 2015 US$)",
    save_path="../figs/gdp_index_2000_pro.png",
    event_years=(2009, 2020),
    source="Source: World Bank (NY.GDP.MKTP.KD), constant 2015 US$; made by @Wanting",
):
    data = df_tidy[df_tidy["country"].isin(countries)].copy()
    # 指数化：base_year = 100
    out = []
    for c in countries:
        sub = data[data["country"] == c].sort_values("year").copy()
        base = sub.loc[sub["year"] == base_year, "value"]
        if not base.empty and base.iloc[0] > 0:
            sub["index"] = sub["value"] / base.iloc[0] * 100
            out.append(sub)
    data_idx = pd.concat(out, ignore_index=True)

    fig, ax = plt.subplots(figsize=(8, 5))
    for c in countries:
        sub = data_idx[data_idx["country"] == c].sort_values("year")
        ax.plot(sub["year"], sub["index"], label=c)

    ax.set_title(title, pad=10)
    ax.set_xlabel("Year")
    ax.set_ylabel(f"GDP index (base={base_year} = 100)")
    ax.grid(True, linestyle="--", alpha=0.3)
    ax.legend(loc="upper left", ncol=1, frameon=False)

    # 重要年份标注
    for y in event_years:
        ax.axvline(y, color="gray", lw=1, ls="--", alpha=0.7)
        ax.text(y+0.1, ax.get_ylim()[0]*1.02, f"{y}", color="gray", fontsize=9)

    # 图脚
    plt.figtext(
        0.01, -0.02,
        source + f" • base={base_year}=100 (relative growth)",
        ha="left", va="top", fontsize=9, color="#444"
    )

    fig.tight_layout()
    if save_path:
        fig.savefig(save_path, dpi=150, bbox_inches="tight")
    plt.show()


In [None]:
print(plot_trillion_log_pro.__name__, plot_index_base_pro.__name__)


In [None]:
# 如果 notebook 顶部没写过，建议加上这句，确保在笔记本里内嵌显示图像
%matplotlib inline

countries = ["USA","China","Germany","Japan","UK","Brazil","Switzerland"]

# 对数坐标 + 万亿美元（更适合看增速/斜率）
plot_trillion_log_pro(
    df, countries,
    title="GDP 2000–2022 (trillion, log scale, constant 2015 US$)",
    save_path="../figs/gdp_trillion_log_pro.png"  # 保存文件
)

# 指数化（2000=100，更适合对比相对涨幅）
plot_index_base_pro(
    df, countries,
    base_year=2000,
    title="GDP 2000–2022 (index, base=2000 = 100, constant 2015 US$)",
    save_path="../figs/gdp_index_2000_pro.png"
)


In [None]:
import pandas as pd

check = (
    df[df["year"].isin([2000, 2022])]
    .pivot_table(index="country", columns="year", values="value", aggfunc="first")
    .loc[countries]
    .assign(growth=lambda x: x[2022] / x[2000] - 1)
)

endpoint_table = pd.DataFrame({
    "2000 (trn)": (check[2000] / 1e12).round(2),
    "2022 (trn)": (check[2022] / 1e12).round(2),
    "growth %":  (check["growth"] * 100).round(1),
})

endpoint_table


## Key Takeaways (2000–2022, constant 2015 US$, World Bank NY.GDP.MKTP.KD)

- **Growth rates**: In the log-scale plot, China’s line has the **steepest slope**, indicating the **highest long-run growth**; the US grows more moderately but has the **largest level**.
- **Shock years**: Around **2009** (Global Financial Crisis) and **2020** (COVID), most countries show **temporary dips** followed by recovery.
- **Japan**: **Flat** over the long run; the **smallest** relative increase by 2022 in the index plot.
- **Germany/UK**: **Moderate** growth with clear dips in 2009 and 2020.
- **Brazil**: **Volatile**; notable declines in 2014–2016 and 2020, then recovery.
- **Switzerland**: **Small in level** but **steadily rising**.

> Notes: constant 2015 US$; **log scale** (slope ≈ growth rate); **index** plot with **2000 = 100** highlights **relative changes**.  
> Source: World Bank, **NY.GDP.MKTP.KD** (via `pandas-datareader`).

## 简要结论（2000–2022，不变价2015美元，World Bank NY.GDP.MKTP.KD）

- **增速**：从对数坐标图看，中国曲线**斜率最陡**，长期增长率最高；美国增速较温和但**总量最大**。
- **冲击年份**：在 **2009**（全球金融危机）与 **2020**（疫情）两条虚线附近，各国出现**阶段性回落**，随后逐步修复。
- **日本**：长期**横盘**，指数化图中 2022 相对 2000 的增幅最小。
- **德国 / 英国**：**温和增长**，两次冲击的“凹点”明显。
- **巴西**：**波动较大**，2014–2016 与 2020 明显下滑后恢复。
- **瑞士**：体量较小，但**稳定上升**。

> 图注：constant 2015 US$；对数坐标——**线条斜率 ≈ 增长率**；指数化图（2000=100）——**比较相对涨幅**。  
> 数据来源：World Bank, **NY.GDP.MKTP.KD**（通过 `pandas-datareader` 获取）。

In [None]:
from IPython.display import Image, display
display(Image("../figs/gdp_trillion_log_pro.png"))

In [None]:
from IPython.display import Image, display
display(Image("../figs/gdp_index_2000_pro.png"))

### （可选）把两张图插进 Markdown 里一起展示：在同一个 Markdown 单元的结尾处加（路径与你保存的一致）

### Figures
![Log scale](../figs/gdp_trillion_log_pro.png)
![Index base 2000](../figs/gdp_index_2000_pro.png)