### 1. Calculate the water balance method (entire_period from 2003.01.01-2020.12.31)

In [3]:
import numpy as np
import xarray as xr
import pandas as pd
from pathlib import Path
from netCDF4 import Dataset, date2num

NOTEBOOK_DIR = Path().resolve()
ROOT_DIR = (NOTEBOOK_DIR / "../data").resolve()
RUN_DIR  = ROOT_DIR / "run"
OUT_DIR = RUN_DIR / "entire_period"
RES = "p05"

ds  = xr.open_dataset(RUN_DIR / "diff.nc")
ds2 = xr.open_dataset(ROOT_DIR / "SC/SC.nc")
ds3 = xr.open_dataset(RUN_DIR / f"Ssoil_{RES}.nc")

diff  = ds["diff"]
sc    = ds2["SC"]
ssoil = ds3["Ssoil"]

time_len = diff.sizes["time"]
nlat     = diff.sizes["lat"]
nlon     = diff.sizes["lon"]

cwd_i = np.zeros((nlat, nlon), dtype=np.float32)
sr    = np.zeros((nlat, nlon), dtype=np.float32)

out_nc = RUN_DIR / "CWD.nc"

# time encoding
time_dt   = pd.to_datetime(ds["time"].values).to_pydatetime()
print("Time range:", time_dt[0], "to", time_dt[-1])
time_unit = "days since 1970-01-01 00:00:00"
time_cal  = "proleptic_gregorian"
time_num  = date2num(time_dt, units=time_unit, calendar=time_cal)

# choose output chunk sizes (tune)
chunk_t, chunk_y, chunk_x = 1, 1200, 1200  # try 400-800

with Dataset(out_nc, "w", format="NETCDF4") as nc:
    nc.createDimension("time", time_len)
    nc.createDimension("lat",  nlat)
    nc.createDimension("lon",  nlon)

    vtime = nc.createVariable("time", "f8", ("time",))
    vtime.units = time_unit
    vtime.calendar = time_cal
    vtime[:] = time_num

    vlon = nc.createVariable("lon", "f4", ("lon",))
    vlon.units = "degrees_east"
    vlon[:] = ds["lon"].values.astype(np.float32)

    vlat = nc.createVariable("lat", "f4", ("lat",))
    vlat.units = "degrees_north"
    vlat[:] = ds["lat"].values.astype(np.float32)

    vCWD = nc.createVariable(
        "CWD", "f4", ("time", "lat", "lon"),
        zlib=True, complevel=3, shuffle=True,
        chunksizes=(chunk_t, chunk_y, chunk_x),
        fill_value=np.float32(np.nan)
    )
    vCWD.long_name = "Cumulative Water Deficit (8-day running state)"

    for i in range(time_len):
        if i % 10 == 0:
            print(f"Processing {i}/{time_len-1}")

        diff_i = diff.isel(time=i).values.astype(np.float32)
        sc_i   = sc.isel(time=i).values.astype(np.float32)

        delta_tn_i = diff_i * sc_i
        cwd_i = np.where(delta_tn_i >= 0, cwd_i + delta_tn_i, 0.0).astype(np.float32)
        sr = np.maximum(sr, cwd_i)

        vCWD[i, :, :] = cwd_i  # stream write

# final Sr/Sbedrock (small 2D outputs)
ssoil  = ssoil.values.astype(np.float32)
sbedrock  = np.where(sr > ssoil, sr - ssoil, 0.0).astype(np.float32)

xr.Dataset({"Sr": (("lat","lon"), sr)}, coords={"lat": ds["lat"], "lon": ds["lon"]}) \
  .to_netcdf(OUT_DIR / "Sr.nc")

xr.Dataset({"Sbedrock": (("lat","lon"), sbedrock)}, coords={"lat": ds["lat"], "lon": ds["lon"]}) \
  .to_netcdf(OUT_DIR / "Sbedrock.nc")


Time range: 2003-01-01 00:00:00 to 2020-12-26 00:00:00
Processing 0/827
Processing 10/827
Processing 20/827
Processing 30/827
Processing 40/827
Processing 50/827
Processing 60/827
Processing 70/827
Processing 80/827
Processing 90/827
Processing 100/827
Processing 110/827
Processing 120/827
Processing 130/827
Processing 140/827
Processing 150/827
Processing 160/827
Processing 170/827
Processing 180/827
Processing 190/827
Processing 200/827
Processing 210/827
Processing 220/827
Processing 230/827
Processing 240/827
Processing 250/827
Processing 260/827
Processing 270/827
Processing 280/827
Processing 290/827
Processing 300/827
Processing 310/827
Processing 320/827
Processing 330/827
Processing 340/827
Processing 350/827
Processing 360/827
Processing 370/827
Processing 380/827
Processing 390/827
Processing 400/827
Processing 410/827
Processing 420/827
Processing 430/827
Processing 440/827
Processing 450/827
Processing 460/827
Processing 470/827
Processing 480/827
Processing 490/827
Proces

### 2. Calculate the water balance method (reset_annual from 2003.01.01-2003.12.31 ...)

In [2]:
import numpy as np
import xarray as xr
import pandas as pd
from pathlib import Path
from netCDF4 import Dataset, date2num

NOTEBOOK_DIR = Path().resolve()
ROOT_DIR = (NOTEBOOK_DIR / "../data").resolve()
RUN_DIR  = ROOT_DIR / "run"
OUT_DIR = RUN_DIR / "reset_annual"
OUT_DIR.mkdir(parents=True, exist_ok=True)
RES = "p05"

ds  = xr.open_dataset(RUN_DIR / "diff.nc")
ds2 = xr.open_dataset(ROOT_DIR / "SC/SC.nc")
ds3 = xr.open_dataset(RUN_DIR / f"Ssoil_{RES}.nc")
out_cwd = RUN_DIR / "CWD_yearreset.nc"

diff  = ds["diff"]    # (time, lat, lon) = (828, 3600, 7200)
sc    = ds2["SC"]     # (time, lat, lon)
ssoil = ds3["Ssoil"]  # (lat, lon)

time_len = diff.sizes["time"]
nlat     = diff.sizes["lat"]
nlon     = diff.sizes["lon"]

# time decoding
time_pd = pd.to_datetime(ds["time"].values)
years = time_pd.year.values
uniq_years = np.unique(years)

print("Time range:", time_pd[0], "to", time_pd[-1], "len=", time_len)
print("Years:", uniq_years[0], "->", uniq_years[-1], "N=", len(uniq_years))
print("Shape:", (time_len, nlat, nlon))

# =========================
# 2D static arrays
# =========================
ssoil2d = ssoil.values.astype(np.float32)

# =========================
# OUTPUT: CWD (year-reset)
# =========================


# time encoding for netcdf
time_dt   = time_pd.to_pydatetime()
time_unit = "days since 1970-01-01 00:00:00"
time_cal  = "proleptic_gregorian"
time_num  = date2num(time_dt, units=time_unit, calendar=time_cal)

# choose output chunk sizes (tune)
chunk_t, chunk_y, chunk_x = 1, 600, 600
chunk_y = min(chunk_y, nlat)
chunk_x = min(chunk_x, nlon)

# =========================
# State arrays (reset per year)
# =========================
cwd_i = np.zeros((nlat, nlon), dtype=np.float32)  # running CWD within current year
sr_y  = np.zeros((nlat, nlon), dtype=np.float32)  # max CWD within current year (Dr)

def finalize_and_write_year(year, Dr2d, Ssoil2d):
    """
    year: int
    Dr2d: (lat, lon) float32, yearly max CWD
    write:
      Dr_{year}.nc
      Dbedrock_{year}.nc
    """
    Dbedrock2d = np.where(Dr2d > Ssoil2d, Dr2d - Ssoil2d, 0.0).astype(np.float32)

    # 你要求：18个Dr_year.nc 和 18个Dbedrock_year.nc（每年各一个文件）
    f_dr = OUT_DIR / f"Dr_{year}.nc"
    f_db = OUT_DIR / f"Dbedrock_{year}.nc"

    xr.Dataset(
        {"Dr": (("lat", "lon"), Dr2d)},
        coords={"lat": ds["lat"].values, "lon": ds["lon"].values},
    ).to_netcdf(f_dr)

    xr.Dataset(
        {"Dbedrock": (("lat", "lon"), Dbedrock2d)},
        coords={"lat": ds["lat"].values, "lon": ds["lon"].values},
    ).to_netcdf(f_db)

    print(f"[YEAR OUT] {year} -> {f_dr.name}, {f_db.name}")

# =========================
# STREAM WRITE CWD_yearreset.nc
# =========================
with Dataset(out_cwd, "w", format="NETCDF4") as nc:
    nc.createDimension("time", time_len)
    nc.createDimension("lat",  nlat)
    nc.createDimension("lon",  nlon)

    vtime = nc.createVariable("time", "f8", ("time",))
    vtime.units = time_unit
    vtime.calendar = time_cal
    vtime[:] = time_num

    vlon = nc.createVariable("lon", "f4", ("lon",))
    vlon.units = "degrees_east"
    vlon[:] = ds["lon"].values.astype(np.float32)

    vlat = nc.createVariable("lat", "f4", ("lat",))
    vlat.units = "degrees_north"
    vlat[:] = ds["lat"].values.astype(np.float32)

    vCWD = nc.createVariable(
        "CWD", "f4", ("time", "lat", "lon"),
        zlib=True, complevel=3, shuffle=True,
        chunksizes=(chunk_t, chunk_y, chunk_x),
        fill_value=np.float32(np.nan)
    )
    vCWD.long_name = "Cumulative Water Deficit (8-day running state, reset to 0 at each year boundary)"

    # 初始化当前年份
    curr_year = int(years[0])

    # 逐时次处理
    for i in range(time_len):
        y = int(years[i])

        # 进入新的一年：先把上一年的 Dr/Dbedrock 写出去，然后重置状态
        if y != curr_year:
            # finalize previous year
            finalize_and_write_year(curr_year, sr_y, ssoil2d)

            # reset for new year
            cwd_i.fill(0.0)
            sr_y.fill(0.0)
            curr_year = y

        if i % 10 == 0:
            print(f"Processing {i}/{time_len-1}  (year={y})")

        # 读当前时次
        diff_i = diff.isel(time=i).values.astype(np.float32)
        sc_i   = sc.isel(time=i).values.astype(np.float32)

        # 年内累计（与你原逻辑一致，只是跨年重置了 cwd_i）
        delta_tn_i = diff_i * sc_i
        cwd_i = np.where(delta_tn_i >= 0, cwd_i + delta_tn_i, 0.0).astype(np.float32)

        # 当年最大值 Dr（年内最大 CWD）
        sr_y = np.maximum(sr_y, cwd_i)

        # 写出该时次的 CWD（全时段 828 都会写）
        vCWD[i, :, :] = cwd_i

    # 循环结束后：别忘了输出最后一年
    finalize_and_write_year(curr_year, sr_y, ssoil2d)

print("[OK] Wrote CWD (year-reset):", out_cwd)
print("[OK] Yearly Dr/Dbedrock in:", OUT_DIR)


Time range: 2003-01-01 00:00:00 to 2020-12-26 00:00:00 len= 828
Years: 2003 -> 2020 N= 18
Shape: (828, 3600, 7200)
Processing 0/827  (year=2003)
Processing 10/827  (year=2003)
Processing 20/827  (year=2003)
Processing 30/827  (year=2003)
Processing 40/827  (year=2003)
[YEAR OUT] 2003 -> Dr_2003.nc, Dbedrock_2003.nc
Processing 50/827  (year=2004)
Processing 60/827  (year=2004)
Processing 70/827  (year=2004)
Processing 80/827  (year=2004)
Processing 90/827  (year=2004)
[YEAR OUT] 2004 -> Dr_2004.nc, Dbedrock_2004.nc
Processing 100/827  (year=2005)
Processing 110/827  (year=2005)
Processing 120/827  (year=2005)
Processing 130/827  (year=2005)
[YEAR OUT] 2005 -> Dr_2005.nc, Dbedrock_2005.nc
Processing 140/827  (year=2006)
Processing 150/827  (year=2006)
Processing 160/827  (year=2006)
Processing 170/827  (year=2006)
Processing 180/827  (year=2006)
[YEAR OUT] 2006 -> Dr_2006.nc, Dbedrock_2006.nc
Processing 190/827  (year=2007)
Processing 200/827  (year=2007)
Processing 210/827  (year=2007)
