In [None]:
#!/usr/bin/env python3
import os
import glob
import subprocess
import calendar
from pathlib import Path
from datetime import datetime, timedelta
from concurrent.futures import ProcessPoolExecutor, as_completed

ROOT_DIR = Path("/share/home/dq076/bedrock/data/DSR/GLASS/").resolve()

PRODUCT = "GLASS05B01"
VAR_NAME = "DSR"
VAR_LONG_NAME = "Downward short-wave radiation (GLASS05B01)"
VAR_UNITS = "W m-2"
FILLVALUE = "-9999"

RAW_DIR   = ROOT_DIR / "rawdata"
TMP_DIR   = ROOT_DIR / "_tmp"
DATA1_DIR = ROOT_DIR / "1D"
DATA2_DIR = ROOT_DIR / "8D"
TMP_DIR.mkdir(parents=True, exist_ok=True)
DATA1_DIR.mkdir(parents=True, exist_ok=True)
DATA2_DIR.mkdir(parents=True, exist_ok=True)

def run(cmd):
    cmd = [str(x) for x in cmd]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if p.returncode != 0:
        raise RuntimeError(
            "Command failed:\n"
            f"{' '.join(cmd)}\n\n"
            f"STDOUT:\n{p.stdout}\n\n"
            f"STDERR:\n{p.stderr}\n"
        )

def process_one_year(year: int) -> str:
    ndays = 366 if calendar.isleap(year) else 365
    tmp_year_dir = TMP_DIR / f"y{year}"
    tmp_year_dir.mkdir(parents=True, exist_ok=True)

    for doy in range(1, ndays + 1):
        yyyymmdd = (datetime(year, 1, 1) + timedelta(days=doy - 1)).strftime("%Y-%m-%d")
        print(f"[DAY] {yyyymmdd}  A{year}{doy:03d}")

        pat = str(RAW_DIR / f"{PRODUCT}.V*.A{str(year)}{doy:03d}.*.hdf")
        hits = sorted(glob.glob(pat))
        hdf_path = Path(hits[-1])
        hdf_uri = f'HDF4_EOS:EOS_GRID:"{hdf_path}":{PRODUCT}:{VAR_NAME}'
        tmp1_path = tmp_year_dir / f"DSR_GLASS_{year}{doy:03d}_1D_p05_Wm-2_tmp1.nc4"
        nc1_path = DATA1_DIR / f"DSR_GLASS_{year}{doy:03d}_1D_p05_Wm-2.nc4"

        run([
            "gdal_translate",
            "-q",
            "-of", "netCDF",
            "-co", "FORMAT=NC4",
            "-unscale",
            "-ot", "Float32",
            "-a_nodata", FILLVALUE,
            "-a_srs", "EPSG:4326",
            hdf_uri,
            str(tmp1_path),
        ])

        run([
            "cdo", "-O", "-L", "-b", "F32", "-f", "nc4", "-z", "zip_3",
            f'-setattribute,{VAR_NAME}@long_name={VAR_LONG_NAME}',
            f'-setattribute,{VAR_NAME}@units={VAR_UNITS}',
            f"-setname,{VAR_NAME}",
            "-invertlat",
            f"-setdate,{yyyymmdd}",
            "-settime,00:00:00",
            f"-setmissval,{FILLVALUE}",
            str(tmp1_path),
            str(nc1_path),
        ])

        run([
            "ncatted", "-O",
            '-a', 'units,time,o,c,days since 1970-01-01 00:00:00',
            '-a', 'calendar,time,o,c,proleptic_gregorian',
            str(nc1_path),
        ])

    files1 = [str(DATA1_DIR / f"DSR_GLASS_{year}{doy:03d}_1D_p05_Wm-2.nc4")
              for doy in range(1, ndays + 1)]

    merge1_path = tmp_year_dir / f"DSR_GLASS_{year}_1D_p05_Wm-2.nc4"
    tmp2_path   = tmp_year_dir / f"DSR_GLASS_{year}001-{year}360_8D_p05_Wm-2.nc4"
    tmp3_path   = tmp_year_dir / f"DSR_GLASS_{year}361-{year}{ndays:03d}_8D_p05_Wm-2.nc4"
    nc2_path    = DATA2_DIR / f"DSR_GLASS_{year}_8D_p05_Wm-2.nc4"

    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3",
         "mergetime", *files1, str(merge1_path)])

    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3",
         "timselmean,8,8", str(merge1_path), str(tmp2_path)])

    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3",
         "timmean", "-seltimestep,361/-1", str(merge1_path), str(tmp3_path)])

    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3",
         "mergetime", str(tmp2_path), str(tmp3_path), str(nc2_path)])

    return str(year)

def main():
    years = list(range(2003, 2021))
    max_workers = min(6, os.cpu_count())

    print(f"[INFO] cpu={os.cpu_count()} max_workers={max_workers}")

    with ProcessPoolExecutor(max_workers=max_workers) as ex:
        futs = {ex.submit(process_one_year, y): y for y in years}
        for fut in as_completed(futs):
            y = futs[fut]
            try:
                msg = fut.result()
                print(f"[OK] year {msg} done")
            except Exception as e:
                print(f"[ERR] year {y} failed:\n{e}")
                raise

    files2 = [str(DATA2_DIR / f"DSR_GLASS_{y}_8D_p05_Wm-2.nc4") for y in years]
    merge2_path = DATA2_DIR / "DSR_GLASS_2003-2020_8D_p05_Wm-2.nc4"
    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3", "mergetime", *files2, str(merge2_path)])
    print(f"[OK] merged -> {merge2_path}")

if __name__ == "__main__":
    main()
