# 1. Preprocess the MODIS ET 0.05D

### 1.1 Download the data from https://www.glass.hku.hk/archive/ET/MODIS/0.05D/

In [None]:
#!/usr/bin/env python3
from pathlib import Path
import subprocess
ROOT_DIR = Path("/tera04/zhwei/xionghui/bedrock/data/")
OUT_DIR = ROOT_DIR/"ET/GLASS/rawdata"
OUT_DIR.mkdir(parents=True, exist_ok=True)

for year in range(2003, 2021):
    url = f"https://www.glass.hku.hk/archive/ET/MODIS/0.05D/{year}/"
    print(f"[YEAR] {year}")

    cmd = [
        "wget",
        "-r",                      # 递归
        "-np",                     # 不进入上级目录
        "-nH",                     # 不创建域名目录
        "--cut-dirs=5",            # 去掉 archive/DSR/MODIS/0.05D/{year}
        "-A", "*.hdf",             # 只下 hdf
        "-c",                      # 断点续传
        "--tries=0",               # 无限重试
        "--retry-connrefused",
        "--waitretry=5",
        "--timeout=30",
        "-P", str(OUT_DIR),        # 输出目录
        url,
    ]

    subprocess.run(cmd, check=True)


### 1.2 Translate the data from 8D hdf to 8D nc4

In [None]:
#!/usr/bin/env python3
import os
import glob
import subprocess
import calendar
from pathlib import Path
from datetime import datetime, timedelta
from concurrent.futures import ProcessPoolExecutor, as_completed

ROOT_DIR = Path("/tera04/zhwei/xionghui/bedrock/data/ET/GLASS/").resolve()

PRODUCT = "GLASS11B01"
VAR_NAME = "ET"
VAR_LONG_NAME = "Evapotranspiration (GLASS11B01)"
VAR_UNITS = "W m-2"
FILLVALUE = "-9999"

RAW_DIR   = ROOT_DIR / "rawdata"
TMP_DIR   = ROOT_DIR / "_tmp"
DATA1_DIR = ROOT_DIR / "8D"
TMP_DIR.mkdir(parents=True, exist_ok=True)
DATA1_DIR.mkdir(parents=True, exist_ok=True)

def run(cmd):
    cmd = [str(x) for x in cmd]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if p.returncode != 0:
        raise RuntimeError(
            "Command failed:\n"
            f"{' '.join(cmd)}\n\n"
            f"STDOUT:\n{p.stdout}\n\n"
            f"STDERR:\n{p.stderr}\n"
        )

def process_one_year(year: int) -> str:
    ndays = 366 if calendar.isleap(year) else 365
    tmp_year_dir = TMP_DIR / f"{year}"
    tmp_year_dir.mkdir(parents=True, exist_ok=True)

    for doy in range(1, ndays + 1, 8):
        yyyymmdd = (datetime(year, 1, 1) + timedelta(days=doy - 1)).strftime("%Y-%m-%d")
        print(f"[DAY] {yyyymmdd}  A{year}{doy:03d}")

        pat = str(RAW_DIR / f"{PRODUCT}.V*.A{str(year)}{doy:03d}.*.hdf")
        hits = sorted(glob.glob(pat))
        hdf_path = Path(hits[-1])
        hdf_uri = f'HDF4_EOS:EOS_GRID:"{hdf_path}":{PRODUCT}:{VAR_NAME}'
        tmp1_path = tmp_year_dir / f"ET_GLASS_{year}{doy:03d}_8D_p05_Wm-2_tmp1.nc"
        nc1_path = DATA1_DIR / f"ET_GLASS_{year}{doy:03d}_8D_p05_Wm-2.nc"

        run([
            "gdal_translate",
            "-q",
            "-of", "netCDF",
            "-co", "FORMAT=NC4",
            "-unscale",
            "-ot", "Float32",
            "-a_nodata", FILLVALUE,
            "-a_srs", "EPSG:4326",
            hdf_uri,
            str(tmp1_path),
        ])

        run([
            "cdo", "-O", "-L", "-b", "F32", "-f", "nc4", "-z", "zip_3",
            "-permute,lat,lon",
            f"-setattribute,{VAR_NAME}@long_name={VAR_LONG_NAME}",
            f"-setattribute,{VAR_NAME}@units={VAR_UNITS}",
            f"-setattribute,{VAR_NAME}@_FillValue=-9999.f", 
            f"-setattribute,{VAR_NAME}@missing_value=-9999.f",
            f"-setname,{VAR_NAME}",
            # "-invertlat",

            "-setmisstoc,-9999",
            f"-setctomiss,{FILLVALUE}",

            "-setcalendar,proleptic_gregorian",
            f"-setreftime,{yyyymmdd},00:00:00",
            "-settunits,days",
            f"-setdate,{yyyymmdd}",
            "-settime,00:00:00",

            str(tmp1_path),
            str(nc1_path),
        ])

    files1 = [str(DATA1_DIR / f"ET_GLASS_{year}{doy:03d}_8D_p05_Wm-2.nc")
              for doy in range(1, ndays + 1, 8)]

    merge1_path = DATA1_DIR / f"ET_GLASS_{year}_8D_p05_Wm-2.nc"

    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3",
         "mergetime", *files1, str(merge1_path)])

    return str(year)

def main():
    years = list(range(2003, 2021))
    max_workers = min(6, os.cpu_count())

    print(f"[INFO] cpu={os.cpu_count()} max_workers={max_workers}")

    with ProcessPoolExecutor(max_workers=max_workers) as ex:
        futs = {ex.submit(process_one_year, y): y for y in years}
        for fut in as_completed(futs):
            y = futs[fut]
            try:
                msg = fut.result()
                print(f"[OK] year {msg} done")
            except Exception as e:
                print(f"[ERR] year {y} failed:\n{e}")
                raise

    files2 = [str(DATA1_DIR / f"ET_GLASS_{y}_8D_p05_Wm-2.nc") for y in years]
    merge2_path = ROOT_DIR / "ET_GLASS_2003-2020_8D_p05_Wm-2.nc"
    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3", "mergetime", *files2, str(merge2_path)])
    print(f"[OK] merged -> {merge2_path}")

if __name__ == "__main__":
    main()


[INFO] cpu=160 max_workers=6
[DAY] 2006-01-01  A2006001[DAY] 2004-01-01  A2004001[DAY] 2003-01-01  A2003001[DAY] 2005-01-01  A2005001[DAY] 2007-01-01  A2007001[DAY] 2008-01-01  A2008001





[DAY] 2007-01-09  A2007009
[DAY] 2003-01-09  A2003009[DAY] 2004-01-09  A2004009[DAY] 2008-01-09  A2008009

[DAY] 2006-01-09  A2006009
[DAY] 2005-01-09  A2005009[DAY] 2007-01-17  A2007017

[DAY] 2003-01-17  A2003017
[DAY] 2004-01-17  A2004017

[DAY] 2008-01-17  A2008017
[DAY] 2005-01-17  A2005017[DAY] 2006-01-17  A2006017[DAY] 2007-01-25  A2007025[DAY] 2003-01-25  A2003025

[DAY] 2004-01-25  A2004025


[DAY] 2008-01-25  A2008025[DAY] 2006-01-25  A2006025[DAY] 2005-01-25  A2005025
[DAY] 2003-02-02  A2003033
[DAY] 2007-02-02  A2007033


[DAY] 2006-02-02  A2006033[DAY] 2005-02-02  A2005033[DAY] 2003-02-10  A2003041
[DAY] 2008-02-02  A2008033

[DAY] 2007-02-10  A2007041
[DAY] 2004-02-02  A2004033
[DAY] 2006-02-10  A2006041
[DAY] 2005-02-10  A2005041[DAY] 2003-02-18  A2003049


[DAY] 2006-02-18  A2006049