In [None]:
import os
import subprocess
import calendar
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor, as_completed

ROOT_DIR = Path("/share/home/dq076/bedrock/data/P/MSWEP_V280").resolve()
IN_DIR   = ROOT_DIR / "rawdata"
TMP_DIR  = ROOT_DIR / "_tmp"
OUT_DIR  = ROOT_DIR / "8D"

p05_path = Path("/share/home/dq076/bedrock/data/p05.txt").resolve()
p1_to_p05_path = IN_DIR / "p1_to_p05.nc"

START_YEAR = 2003
END_YEAR   = 2020

IN_PATTERN  = "Precipitation_{year}.nc"
OUT_PATTERN = "P_MSWEP_{year}_8D_p1_mm8d.nc"


STEPS_PER_8DAY = 8
FULL_WINDOWS = 45
FULL_STEPS = FULL_WINDOWS * STEPS_PER_8DAY  # 360
N_WINDOWS_TOTAL = 46

JOBS = min(4, os.cpu_count() or 4)
OMP_NUM_THREADS = "8"

OUT_DIR.mkdir(parents=True, exist_ok=True)
TMP_DIR.mkdir(parents=True, exist_ok=True)


def run(cmd, cwd=None):
    cmd = [str(x) for x in cmd]
    p = subprocess.run(
        cmd,
        cwd=str(cwd) if cwd else None,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
    )
    if p.returncode != 0:
        raise RuntimeError(
            "Command failed:\n"
            f"{' '.join(cmd)}\n\n"
            f"STDOUT:\n{p.stdout}\n\n"
            f"STDERR:\n{p.stderr}\n"
        )
    return p.stdout.strip()


def process_one_year(year: int) -> str:
    ndays = 366 if calendar.isleap(year) else 365
    in_path = IN_DIR / f"Precipitation_{year}.nc"

    subdir = TMP_DIR / f"{year}"
    subdir.mkdir(parents=True, exist_ok=True)

    tmp1_path = subdir / f"P_MSWEP_{year}001-{year}360_8D_p1_mm8d-1.nc"
    tmp2_path = subdir / f"P_MSWEP_{year}361-{year}{ndays:03d}_8D_p1_mm8d-1.nc"
    tmp3_path = subdir / f"P_MSWEP_{year}_8D_p1_mm8d-1.nc"
    out_path = OUT_DIR / f"P_MSWEP_{year}_8D_p05_mm8d-1.nc"

    # 1) first 360 timesteps -> 45 timesteps (8-day sums)
    run([
        "cdo", "-O", "-L",
        "-f", "nc4c", "-z", "zip_3",
        f"timselsum,{STEPS_PER_8DAY}",
        f"-seltimestep,1/{FULL_STEPS}",
        str(in_path),
        str(tmp1_path),
    ])

    # 2) remainder -> 1 timestep (tail sum)
    run([
        "cdo", "-O", "-L",
        "-f", "nc4c", "-z", "zip_3",
        "timsum",
        f"-seltimestep,{FULL_STEPS+1}/-1",
        str(in_path),
        str(tmp2_path),
    ])

    # 3) merge -> 46 timesteps
    run([
        "cdo", "-O", "-L",
        "-f", "nc4c", "-z", "zip_3",
        "mergetime",
        str(tmp1_path),
        str(tmp2_path),
        str(tmp3_path),
    ])

    run([
        "cdo", "-O", "-L",
        "-f", "nc4c", "-z", "zip_3",
        f"remap,{p05_path},{p1_to_p05_path}",
        str(tmp3_path),
        str(out_path),
    ])

    # cleanup
    # tmp1_path.unlink(missing_ok=True)
    # tmp2_path.unlink(missing_ok=True)

    return f"[OK] {year}: {tmp3_path.name} ({N_WINDOWS_TOTAL} windows: 45*8day + tail)"


def main():
    os.environ["OMP_NUM_THREADS"] = OMP_NUM_THREADS
    years = list(range(START_YEAR, END_YEAR + 1))

    print(f"[INFO] IN={IN_DIR}")
    print(f"[INFO] OUT={OUT_DIR}")
    print(f"[INFO] Years={START_YEAR}-{END_YEAR}")
    print(f"[INFO] cpu={os.cpu_count()} max_workers={JOBS} OMP_NUM_THREADS={OMP_NUM_THREADS}")

    with ProcessPoolExecutor(max_workers=JOBS) as ex:
        futs = {ex.submit(process_one_year, y): y for y in years}
        for fut in as_completed(futs):
            y = futs[fut]
            try:
                print(fut.result())
            except Exception as e:
                print(f"[ERR] {y} failed:\n{e}")
                raise

    files1 = [
        str(OUT_DIR / f"P_MSWEP_{year}_8D_p05_mm8d-1.nc")
        for year in years
    ]

    merge1_path = ROOT_DIR / "P_MSWEP_2003-2020_8D_p05_mm8d-1.nc"
    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3", "mergetime", *files1, str(merge1_path)])

    print("===> ALL DONE")


if __name__ == "__main__":
    main()
