# 1. Preprocess the MODIS GLASS downward shortwave radiation

### 1.1 Download the data from https://www.glass.hku.hk/archive/DSR/MODIS/0.05D/, lack the 2012 366 data, copy the 365 to 366

In [None]:
#!/usr/bin/env python3
from pathlib import Path
import subprocess

OUT_DIR = Path("/share/home/dq076/bedrock/data/DSR/GLASS/rawdata")
OUT_DIR.mkdir(parents=True, exist_ok=True)

for year in range(2003, 2021):
    url = f"https://www.glass.hku.hk/archive/DSR/MODIS/0.05D/{year}/"
    print(f"[YEAR] {year}")

    cmd = [
        "wget",
        "-r",                      # 递归
        "-np",                     # 不进入上级目录
        "-nH",                     # 不创建域名目录
        "--cut-dirs=5",            # 去掉 archive/DSR/MODIS/0.05D/{year}
        "-A", "*.hdf",             # 只下 hdf
        "-c",                      # 断点续传 
        "--tries=0",               # 无限重试
        "--retry-conDSRefused",
        "--waitretry=5",
        "--timeout=30",
        "-P", str(OUT_DIR),        # 输出目录
        url,
    ]

    subprocess.run(cmd, check=True)


### 1.2 Translate the data from 1D hdf to 8D nc4

In [None]:
#!/usr/bin/env python3
import os
import glob
import subprocess
import calendar
from pathlib import Path
from datetime import datetime, timedelta
from concurrent.futures import ProcessPoolExecutor, as_completed

ROOT_DIR = Path("/share/home/dq076/bedrock/data/DSR/GLASS/").resolve()

PRODUCT = "GLASS05B01"
VAR_NAME = "DSR"
VAR_LONG_NAME = "Downward short-wave radiation (GLASS05B01)"
VAR_UNITS = "W m-2"
FILLVALUE = "-9999"

RAW_DIR   = ROOT_DIR / "rawdata"
TMP_DIR   = ROOT_DIR / "_tmp"
DATA1_DIR = ROOT_DIR / "1D"
DATA2_DIR = ROOT_DIR / "8D"
TMP_DIR.mkdir(parents=True, exist_ok=True)
DATA1_DIR.mkdir(parents=True, exist_ok=True)
DATA2_DIR.mkdir(parents=True, exist_ok=True)

def run(cmd):
    cmd = [str(x) for x in cmd]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if p.returncode != 0:
        raise RuntimeError(
            "Command failed:\n"
            f"{' '.join(cmd)}\n\n"
            f"STDOUT:\n{p.stdout}\n\n"
            f"STDERR:\n{p.stderr}\n"
        )

def process_one_year(year: int) -> str:
    ndays = 366 if calendar.isleap(year) else 365
    tmp_year_dir = TMP_DIR / f"y{year}"
    tmp_year_dir.mkdir(parents=True, exist_ok=True)

    for doy in range(1, ndays + 1):
        yyyymmdd = (datetime(year, 1, 1) + timedelta(days=doy - 1)).strftime("%Y-%m-%d")
        print(f"[DAY] {yyyymmdd}  A{year}{doy:03d}")

        pat = str(RAW_DIR / f"{PRODUCT}.V*.A{str(year)}{doy:03d}.*.hdf")
        hits = sorted(glob.glob(pat))
        if not hits:
            raise FileNotFoundError(
                f"[MISS] no HDF for {PRODUCT} A{year}{doy:03d}\npattern: {pat}\n"
                f"RAW_DIR: {RAW_DIR}"
            )
        hdf_path = Path(hits[-1])

        hdf_uri = f'HDF4_EOS:EOS_GRID:"{hdf_path}":{PRODUCT}:{VAR_NAME}'
        tmp1_path = tmp_year_dir / f"DSR_GLASS_{year}{doy:03d}_1D_p05_Wm-2_tmp1.nc4"
        nc1_path = DATA1_DIR / f"DSR_GLASS_{year}{doy:03d}_1D_p05_Wm-2.nc4"

        run([
            "gdal_translate",
            "-q",
            "-of", "netCDF",
            "-co", "FORMAT=NC4",
            "-unscale",
            "-ot", "Float32",
            "-a_nodata", FILLVALUE,
            "-a_srs", "EPSG:4326",
            hdf_uri,
            str(tmp1_path),
        ])

        run([
            "cdo", "-O", "-L", "-b", "F32", "-f", "nc4", "-z", "zip_3",
            f"-setattribute,{VAR_NAME}@long_name={VAR_LONG_NAME}",
            f"-setattribute,{VAR_NAME}@units={VAR_UNITS}",
            f"-setattribute,{VAR_NAME}@_FillValue=0.f", 
            f"-setattribute,{VAR_NAME}@missing_value=0.f",
            f"-setname,{VAR_NAME}",
            "-invertlat",

            "-setmisstoc,0",
            f"-setctomiss,{FILLVALUE}",

            "-setcalendar,proleptic_gregorian",
            f"-setreftime,{yyyymmdd},00:00:00",
            "-settunits,days",
            f"-setdate,{yyyymmdd}",
            "-settime,00:00:00",

            str(tmp1_path),
            str(nc1_path),
        ])

    files1 = [str(DATA1_DIR / f"DSR_GLASS_{year}{doy:03d}_1D_p05_Wm-2.nc4")
              for doy in range(1, ndays + 1)]

    merge1_path = tmp_year_dir / f"DSR_GLASS_{year}_1D_p05_Wm-2.nc4"
    tmp2_path   = tmp_year_dir / f"DSR_GLASS_{year}001-{year}360_8D_p05_Wm-2.nc4"
    tmp3_path   = tmp_year_dir / f"DSR_GLASS_{year}361-{year}{ndays:03d}_8D_p05_Wm-2.nc4"
    nc2_path    = DATA2_DIR / f"DSR_GLASS_{year}_8D_p05_Wm-2.nc4"

    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3",
         "mergetime", *files1, str(merge1_path)])

    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3",
         "timselmean,8,8", str(merge1_path), str(tmp2_path)])

    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3",
         "timmean", "-seltimestep,361/-1", str(merge1_path), str(tmp3_path)])

    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3",
         "mergetime", str(tmp2_path), str(tmp3_path), str(nc2_path)])

    return str(year)

def main():
    years = list(range(2003, 2021))
    max_workers = min(6, os.cpu_count())

    print(f"[INFO] cpu={os.cpu_count()} max_workers={max_workers}")

    with ProcessPoolExecutor(max_workers=max_workers) as ex:
        futs = {ex.submit(process_one_year, y): y for y in years}
        for fut in as_completed(futs):
            y = futs[fut]
            try:
                msg = fut.result()
                print(f"[OK] year {msg} done")
            except Exception as e:
                print(f"[ERR] year {y} failed:\n{e}")
                raise

    files2 = [str(DATA2_DIR / f"DSR_GLASS_{y}_8D_p05_Wm-2.nc4") for y in years]
    merge2_path = ROOT_DIR / "DSR_GLASS_2003-2020_8D_p05_Wm-2.nc4"
    run(["cdo", "-O", "-L", "-f", "nc4", "-z", "zip_3", "mergetime", *files2, str(merge2_path)])
    print(f"[OK] merged -> {merge2_path}")

if __name__ == "__main__":
    main()


[INFO] cpu=48 max_workers=6
[DAY] 2003-01-01  A2003001[DAY] 2004-01-01  A2004001[DAY] 2008-01-01  A2008001[DAY] 2005-01-01  A2005001[DAY] 2006-01-01  A2006001[DAY] 2007-01-01  A2007001





[DAY] 2007-01-02  A2007002
[DAY] 2005-01-02  A2005002[DAY] 2008-01-02  A2008002

[DAY] 2003-01-02  A2003002
[DAY] 2004-01-02  A2004002
[DAY] 2006-01-02  A2006002
[DAY] 2006-01-03  A2006003
[DAY] 2008-01-03  A2008003
[DAY] 2004-01-03  A2004003
[DAY] 2005-01-03  A2005003
[DAY] 2003-01-03  A2003003
[DAY] 2007-01-03  A2007003
[DAY] 2004-01-04  A2004004
[DAY] 2008-01-04  A2008004
[DAY] 2006-01-04  A2006004
[DAY] 2005-01-04  A2005004
[DAY] 2003-01-04  A2003004
[DAY] 2007-01-04  A2007004
[DAY] 2006-01-05  A2006005
[DAY] 2004-01-05  A2004005
[DAY] 2008-01-05  A2008005
[DAY] 2005-01-05  A2005005
[DAY] 2003-01-05  A2003005
[DAY] 2007-01-05  A2007005
[DAY] 2006-01-06  A2006006
[DAY] 2008-01-06  A2008006
[DAY] 2004-01-06  A2004006
[DAY] 2005-01-06  A2005006
[DAY] 2007-01-06  A2007006
[DAY] 2003-01-06  A2003006


IndexError: list index out of range