# 1. Preprocess the MODIS GLASS net radiation

### 1.1 Download the data from https://www.glass.hku.hk/archive/RN/MODIS/
### !Note: url is RN not NR

In [None]:
#!/usr/bin/env python3
from pathlib import Path
import subprocess

OUT_DIR = Path("/share/home/dq076/bedrock/data/NR/GLASS/rawdata")
OUT_DIR.mkdir(parents=True, exist_ok=True)

for year in range(2003, 2021):
    url = f"https://www.glass.hku.hk/archive/RN/MODIS/{year}/"
    print(f"[YEAR] {year}")

    cmd = [
        "wget",
        "-r",                      # 递归
        "-np",                     # 不进入上级目录
        "-nH",                     # 不创建域名目录
        "--cut-dirs=5",            # 去掉 archive/DSR/MODIS/0.05D/{year}
        "-A", "*.hdf",             # 只下 hdf
        "-c",                      # 断点续传
        "--tries=0",               # 无限重试
        "--retry-connrefused",
        "--waitretry=5",
        "--timeout=30",
        "-P", str(OUT_DIR),        # 输出目录
        url,
    ]

    subprocess.run(cmd, check=True)


### 1.2 Translate the data from 1D hdf to 8D nc4

##### 1.2.1 From 1D hdf to 1D nc4

In [None]:
#!/usr/bin/env python3
import glob
import subprocess
import calendar
from pathlib import Path
from datetime import datetime, timedelta

START_DATE = datetime(2003, 1, 1)
END_DATE   = datetime(2020, 12, 31)
STEP_DAYS  = 1

ROOT_DIR = Path("/share/home/dq076/bedrock/data/NR/GLASS/").resolve()

PRODUCT = "GLASS07B11"
VAR_NAME = "NR"
VAR_LONG_NAME = "Net radiation (GLASS07B11)"
VAR_UNITS = "W m-2"
FILLVALUE = "-9999"

RAW_DIR   = ROOT_DIR / "rawdata"
TMP_DIR   = ROOT_DIR / "_tmp"
DATA1_DIR = ROOT_DIR / "1D"
DATA2_DIR = ROOT_DIR / "8D"
TMP_DIR.mkdir(parents=True, exist_ok=True)
DATA1_DIR.mkdir(parents=True, exist_ok=True)
DATA2_DIR.mkdir(parents=True, exist_ok=True)

def run(cmd):
    cmd = [str(x) for x in cmd]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if p.returncode != 0:
        raise RuntimeError(
            "Command failed:\n"
            f"{' '.join(cmd)}\n\n"
            f"STDOUT:\n{p.stdout}\n\n"
            f"STDERR:\n{p.stderr}\n"
        )

def hdf_to_nc4():
    # ensure dirs exist
    dt = START_DATE
    while dt <= END_DATE:
        year = dt.year
        doy  = f"{dt.timetuple().tm_yday:03d}"
        yyyymmdd = dt.strftime("%Y-%m-%d")

        print(f"[DAY] {yyyymmdd}  A{year}{doy}")

        # filenames
        pat = str(RAW_DIR / f"{PRODUCT}.V*.A{year}{doy}.*.hdf")
        hits = sorted(glob.glob(pat))
        if not hits:
            print(f"[MISS] {pat} (skip)")
            dt += timedelta(days=STEP_DAYS)
            continue
        hdf_path = Path(hits[-1])
        # hdf_path = RAW_DIR / f"GLASS07B11.V50.A{year}{doy}.2023290.hdf"
        tmp1_path = TMP_DIR / f"NR_GLASS_{year}{doy}_1D_p05_Wm-2_tmp1.nc4"
        nc1_path   = DATA1_DIR / f"NR_GLASS_{year}{doy}_1D_p05_Wm-2.nc4"

        if not hdf_path.exists():
            print(f"[MISS] {hdf_path} (skip)")
            dt += timedelta(days=STEP_DAYS)
            continue

        hdf_uri = f'HDF4_EOS:EOS_GRID:"{hdf_path}":{PRODUCT}:{VAR_NAME}'

        run([
            "gdal_translate",
            "-q",
            "-of", "netCDF",
            "-co", "FORMAT=NC4",
            "-unscale",
            "-ot", "Float32",
            "-a_nodata", FILLVALUE,
            "-a_srs", "EPSG:4326",
            hdf_uri,
            str(tmp1_path),
        ])

        run([
            "cdo", "-O", "-L", "-b", "F32", "-f", "nc4", "-z", "zip_3",
            f'-setattribute,{VAR_NAME}@long_name={VAR_LONG_NAME}',
            f'-setattribute,{VAR_NAME}@units={VAR_UNITS}',
            f"-setname,{VAR_NAME}",
            "-invertlat",
            f"-setdate,{yyyymmdd}",
            "-settime,00:00:00",
            f"-setmissval,{FILLVALUE}",
            str(tmp1_path),
            str(nc1_path),
        ])

        run([
            "ncatted", "-O",
            '-a', 'units,time,o,c,days since 1970-01-01 00:00:00',
            '-a', 'calendar,time,o,c,proleptic_gregorian',
            str(nc1_path),
        ])

        dt += timedelta(days=STEP_DAYS)

def nc_1D_to_8D():
    # ensure dirs exist
    for year in range(2003,2021):
        ndays = 366 if calendar.isleap(year) else 365
        files1 = [str(DATA1_DIR / f"NR_GLASS_{year}{doy:03d}_1D_p05_Wm-2.nc4")
                for doy in range(1, ndays + 1)]
        merge1_path = DATA1_DIR/ f"NR_GLASS_{year}_1D_p05_Wm-2.nc4"
        tmp2_path = TMP_DIR/ f"NR_GLASS_{year}001-{year}360_8D_p05_Wm-2.nc4"
        tmp3_path = TMP_DIR/ f"NR_GLASS_{year}361-{year}{ndays:03d}_8D_p05_Wm-2.nc4"
        nc2_path = DATA2_DIR/ f"NR_GLASS_{year}_8D_p05_Wm-2.nc4"

        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            "mergetime", *files1, merge1_path
        ])        

        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            "timselmean,8,8", merge1_path, tmp2_path
        ])

        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            "-timmean", "-seltimestep,361/-1",
            merge1_path, tmp3_path
        ])               

        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            "-mergetime", tmp2_path, tmp3_path,
            nc2_path
        ])      
    files2 = [str(DATA2_DIR/ f"NR_GLASS_{year}_8D_p05_Wm-2.nc4")
        for year in range(2003, 2021)]
    merge2_path = DATA2_DIR/ f"NR_GLASS_2003-2020_8D_p05_Wm-2.nc4"
    run([
        "cdo", "-O", "-L",
        "-f", "nc4", "-z", "zip_3",
        "mergetime", *files2, merge2_path
    ]) 

if __name__ == "__main__":
    # hdf_to_nc4()
    nc_1D_to_8D()

[DAY] 2003-01-01  A2003001
[DAY] 2003-01-02  A2003002
[DAY] 2003-01-03  A2003003
[DAY] 2003-01-04  A2003004
[DAY] 2003-01-05  A2003005
[DAY] 2003-01-06  A2003006
[DAY] 2003-01-07  A2003007
[DAY] 2003-01-08  A2003008
[DAY] 2003-01-09  A2003009
[DAY] 2003-01-10  A2003010
[DAY] 2003-01-11  A2003011
[DAY] 2003-01-12  A2003012
[DAY] 2003-01-13  A2003013
[DAY] 2003-01-14  A2003014
[DAY] 2003-01-15  A2003015
[DAY] 2003-01-16  A2003016
[DAY] 2003-01-17  A2003017
[DAY] 2003-01-18  A2003018
[DAY] 2003-01-19  A2003019
[DAY] 2003-01-20  A2003020
[DAY] 2003-01-21  A2003021
[DAY] 2003-01-22  A2003022
[DAY] 2003-01-23  A2003023
[DAY] 2003-01-24  A2003024
[DAY] 2003-01-25  A2003025
[DAY] 2003-01-26  A2003026
[DAY] 2003-01-27  A2003027
[DAY] 2003-01-28  A2003028
[DAY] 2003-01-29  A2003029
[DAY] 2003-01-30  A2003030
[DAY] 2003-01-31  A2003031
[DAY] 2003-02-01  A2003032
[DAY] 2003-02-02  A2003033
[DAY] 2003-02-03  A2003034
[DAY] 2003-02-04  A2003035
[DAY] 2003-02-05  A2003036
[DAY] 2003-02-06  A2003037
[

##### 1.2.2 From 1D nc4 to 8D nc4

In [None]:
#!/usr/bin/env python3
import os
import subprocess
from pathlib import Path
from datetime import datetime, timedelta
import calendar

START_DATE = datetime(2003, 1, 1)
END_DATE   = datetime(2020, 12, 31)
NEW_STEP_DAYS  = 8

ROOT_DIR = Path("/share/home/dq076/bedrock/data/NR/GLASS/").resolve()

PRODUCT = "GLASS07B11"
VAR_NAME = "NR"
VAR_LONG_NAME = "Net radiation (GLASS07B11)"
VAR_UNITS = "W m-2"
FILLVALUE = "-9999"

RAW_DIR   = ROOT_DIR / "rawdata"
TMP_DIR   = ROOT_DIR / "_tmp"
DATA1_DIR = ROOT_DIR / "1D"
DATA2_DIR = ROOT_DIR / "8D"
TMP_DIR.mkdir(parents=True, exist_ok=True)
DATA1_DIR.mkdir(parents=True, exist_ok=True)
DATA2_DIR.mkdir(parents=True, exist_ok=True)

def run(cmd):
    """Run command and raise with readable error."""
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if p.returncode != 0:
        raise RuntimeError(
            "Command failed:\n"
            f"{' '.join(cmd)}\n\n"
            f"STDOUT:\n{p.stdout}\n\n"
            f"STDERR:\n{p.stderr}\n"
        )
    
def main():
    # ensure dirs exist
    for year in range(2003,2021):
        ndays = 366 if calendar.isleap(year) else 365
        files1 = [str(DATA1_DIR / f"NR_GLASS_{year}{doy:03d}_1D_p05_Wm-2.nc4")
                for doy in range(1, ndays + 1)]
        merge1_path = DATA1_DIR/ f"NR_GLASS_{year}_1D_p05_Wm-2.nc4"
        tmp2_path = TMP_DIR/ f"NR_GLASS_{year}001-{year}360_8D_p05_Wm-2.nc4"
        tmp3_path = TMP_DIR/ f"NR_GLASS_{year}361-{year}{ndays:03d}_8D_p05_Wm-2.nc4"
        nc2_path = DATA2_DIR/ f"NR_GLASS_{year}_8D_p05_Wm-2.nc4"

        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            "mergetime", *files1, merge1_path
        ])        

        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            "timselmean,8,8", merge1_path, tmp2_path
        ])

        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            "-timmean", "-seltimestep,361/-1",
            merge1_path, tmp3_path
        ])               

        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            "-mergetime", tmp2_path, tmp3_path,
            nc2_path
        ])      
    files2 = [str(DATA2_DIR/ f"NR_GLASS_{year}_8D_p05_Wm-2.nc4")
        for year in range(2003, 2021)]
    merge2_path = DATA2_DIR/ f"NR_GLASS_2003-2020_8D_p05_Wm-2.nc4"
    run([
        "cdo", "-O", "-L",
        "-f", "nc4", "-z", "zip_3",
        "mergetime", *files2, merge2_path
    ]) 

if __name__ == "__main__":
    main()