# 1. Preprocess the MODIS GLASS downward shortwave radiation

### 1.1 Download the data from https://www.glass.hku.hk/archive/DSR/MODIS/0.05D/

In [None]:
#!/usr/bin/env python3
from pathlib import Path
import subprocess

OUT_DIR = Path("/share/home/dq076/bedrock/data/DSR/GLASS/rawdata")
OUT_DIR.mkdir(parents=True, exist_ok=True)

for year in range(2003, 2021):
    url = f"https://www.glass.hku.hk/archive/DSR/MODIS/0.05D/{year}/"
    print(f"[YEAR] {year}")

    cmd = [
        "wget",
        "-r",                      # 递归
        "-np",                     # 不进入上级目录
        "-nH",                     # 不创建域名目录
        "--cut-dirs=5",            # 去掉 archive/DSR/MODIS/0.05D/{year}
        "-A", "*.hdf",             # 只下 hdf
        "-c",                      # 断点续传 
        "--tries=0",               # 无限重试
        "--retry-conDSRefused",
        "--waitretry=5",
        "--timeout=30",
        "-P", str(OUT_DIR),        # 输出目录
        url,
    ]

    subprocess.run(cmd, check=True)


### 1.2 Translate the data from 1D hdf to 8D nc4

In [5]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
GLASS DSR daily HDF -> 8-day NC4 (zip_3) using GDAL + CDO
(HDF4Image/HDF4 Dataset style: no EOS_GRID subdatasets)

- Apply scale (0.01) using gdal_translate -unscale
- Convert to float32 directly in gdal_translate to preserve decimal precision
- Output in EPSG:4326 (already in geographic coordinates)
"""

import re
import sys
import shutil
import tempfile
import subprocess
from pathlib import Path
from datetime import datetime, timedelta

# -----------------------
# CONFIG
# -----------------------
IN_DIR  = Path("/share/home/dq076/bedrock/data/DSR/GLASS/rawdata").resolve()
OUT_DIR = Path("/share/home/dq076/bedrock/data/DSR/GLASS/8D").resolve()
OUT_DIR.mkdir(parents=True, exist_ok=True)

PRODUCT = "GLASS05B01"
VAR_OUT_NAME = "DSR"

START_DATE = datetime(2003, 1, 1)
END_DATE   = datetime(2020, 12, 31)
STEP_DAYS = 8

# GLASS metadata
NODATA_IN = "-1"
SCALE_FACTOR = 0.01  # GLASS scale_factor in metadata

# 输出数据类型：强制为Float32以保留小数精度
OUTPUT_DTYPE = "Float32"  # GDAL支持的类型：Byte, Int16, UInt16, Int32, UInt32, Float32, Float64

# 使用EPSG:4326（WGS84）作为输出坐标系（虽然数据基于Clarke 1866椭球，但相差很小）
TARGET_SRS = "EPSG:4326"

# Choose 8-day aggregation: "timmean" or "timsum"
AGG = "timmean"

PAT = re.compile(rf"^{re.escape(PRODUCT)}\.V\d+\.A(\d{{4}})(\d{{3}})\..*\.hdf$")


def run(cmd, cwd=None):
    p = subprocess.run(cmd, cwd=cwd, text=True, capture_output=True)
    if p.returncode != 0:
        raise RuntimeError(
            "Command failed:\n"
            f"  cmd: {' '.join(cmd)}\n"
            f"  cwd: {cwd}\n"
            f"  stdout:\n{p.stdout}\n"
            f"  stderr:\n{p.stderr}\n"
        )
    return p.stdout


def yyyyddd(dt: datetime) -> str:
    return dt.strftime("%Y") + f"{dt.timetuple().tm_yday:03d}"


def yyyymmdd(dt: datetime) -> str:
    return dt.strftime("%Y-%m-%d")


def find_hdf_for_date(dt: datetime) -> Path | None:
    key = yyyyddd(dt)
    candidates = sorted(IN_DIR.glob(f"{PRODUCT}.V*.A{key}.*.hdf"))
    return candidates[0] if candidates else None


def check_tools():
    for tool in ["gdal_translate", "gdalinfo", "cdo"]:
        if shutil.which(tool) is None:
            raise RuntimeError(f"Missing required tool in PATH: {tool}")
    run(["gdalinfo", "--format", "netCDF"])
    run(["cdo", "-V"])


def hdf_dataset_uri(hdf_path: Path) -> str:
    return str(hdf_path)


def export_daily_nc(dt: datetime, hdf_path: Path, out_nc: Path):
    """
    Export daily DSR to a 1-timestep netCDF file:
      - Apply scale_factor via gdal_translate -unscale
      - Convert to float32 directly to preserve decimal precision
      - Assign EPSG:4326 coordinate system
      - Add time axis via setdate/settime
    """
    with tempfile.TemporaryDirectory(prefix="glass_DSR_day_") as td:
        td = Path(td)
        raw_nc = td / "raw.nc4"
        
        # 关键修改：一步完成所有转换
        print(f"    Converting {hdf_path.name} to netCDF...")
        
        # 使用gdal_translate直接完成：
        # 1. -unscale: 应用scale_factor (×0.01)
        # 2. -ot Float32: 直接转换为浮点数，避免精度损失
        # 3. -a_srs EPSG:4326: 分配WGS84坐标系（虽然原始是Clarke 1866，但差异很小）
        # 4. -a_nodata: 设置无效值
        run([
            "gdal_translate",
            "-q",
            "-of", "netCDF",
            "-co", "FORMAT=NC4",
            "-ot", OUTPUT_DTYPE,      # 关键：直接输出为浮点数
            "-unscale",               # 应用scale_factor (0.01)
            "-a_nodata", NODATA_IN,
            "-a_srs", TARGET_SRS,     # 分配坐标系（不是重投影）
            hdf_dataset_uri(hdf_path),
            str(raw_nc)
        ])
        
        # 设置变量名和时间坐标
        # 注意：不再需要CDO的-b F32参数，因为gdal_translate已经输出浮点数
        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            f"-setname,{VAR_OUT_NAME}",
            f"-setdate,{yyyymmdd(dt)}",
            "-settime,00:00:00",
            str(raw_nc),
            str(out_nc)
        ])
        
        # 可选：验证输出数据类型
        # run(["ncdump", "-h", str(out_nc)])


def make_8day(daily_files: list[Path], out_nc: Path):
    """
    Merge 8 daily files and compute 8-day aggregation (mean or sum).
    """
    with tempfile.TemporaryDirectory(prefix="glass_DSR_8day_") as td:
        td = Path(td)
        merged = td / "merged.nc4"

        # 合并时间序列
        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            "mergetime", *map(str, daily_files), str(merged)
        ])

        # 计算8天聚合
        run([
            "cdo", "-O", "-L",
            "-f", "nc4", "-z", "zip_3",
            AGG, str(merged), str(out_nc)
        ])


def main():
    check_tools()

    dt = START_DATE
    n_done = n_skip = n_warn = 0

    print(f"IN_DIR : {IN_DIR}")
    print(f"OUT_DIR: {OUT_DIR}")
    print(f"Range  : {START_DATE.date()} -> {END_DATE.date()} (8-day non-overlap)")
    print(f"AGG    : {AGG}")
    print(f"Output dtype: {OUTPUT_DTYPE}")
    print(f"Target SRS: {TARGET_SRS}")
    print(f"Note: Data already in geographic coordinates, using -a_srs (not reprojection)")

    while dt <= END_DATE:
        key = yyyyddd(dt)
        out_nc = OUT_DIR / f"DSR_GLASS_{key}_8D_p05_Wm2.nc4"

        if out_nc.exists():
            print(f"[SKIP] A{key} exists")
            n_skip += 1
            dt += timedelta(days=STEP_DAYS)
            continue

        daily_dates = [dt + timedelta(days=i) for i in range(STEP_DAYS)]
        daily_hdfs = []
        missing = []

        for d in daily_dates:
            if d > END_DATE:
                break
            h = find_hdf_for_date(d)
            if h is None:
                missing.append(d)
            else:
                daily_hdfs.append(h)

        if len(daily_hdfs) != STEP_DAYS:
            print(f"[WARN] A{key}: missing {STEP_DAYS - len(daily_hdfs)} day(s): "
                  + ", ".join(yyyymmdd(x) for x in missing[:10])
                  + (" ..." if len(missing) > 10 else ""))
            n_warn += 1
            dt += timedelta(days=STEP_DAYS)
            continue

        print(f"[RUN ] A{key}: {yyyymmdd(dt)} .. {yyyymmdd(dt+timedelta(days=7))}")

        with tempfile.TemporaryDirectory(prefix=f"glass_DSR_A{key}_") as td:
            td = Path(td)
            daily_ncs = []
            for d, hdf in zip(daily_dates, daily_hdfs):
                day_nc = td / f"DSR_{yyyymmdd(d)}.nc4"
                export_daily_nc(d, hdf, day_nc)
                daily_ncs.append(day_nc)

            make_8day(daily_ncs, out_nc)

        print(f"[OK ] A{key} -> {out_nc.name}")
        n_done += 1
        dt += timedelta(days=STEP_DAYS)

    print("==== SUMMARY ====")
    print(f"done: {n_done}, skip: {n_skip}, warn(missing): {n_warn}")
    print(f"Output: {OUT_DIR}")


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"[FATAL] {e}", file=sys.stderr)
        sys.exit(1)

IN_DIR : /share/home/dq076/bedrock/data/DSR/GLASS/rawdata
OUT_DIR: /share/home/dq076/bedrock/data/DSR/GLASS/8D
Range  : 2003-01-01 -> 2020-12-31 (8-day non-overlap)
AGG    : timmean
Output dtype: Float32
Target SRS: EPSG:4326
Note: Data already in geographic coordinates, using -a_srs (not reprojection)
[RUN ] A2003001: 2003-01-01 .. 2003-01-08
    Converting GLASS05B01.V42.A2003001.2020313.hdf to netCDF...
    Converting GLASS05B01.V42.A2003002.2020313.hdf to netCDF...
    Converting GLASS05B01.V42.A2003003.2020313.hdf to netCDF...
    Converting GLASS05B01.V42.A2003004.2020313.hdf to netCDF...
    Converting GLASS05B01.V42.A2003005.2020313.hdf to netCDF...
    Converting GLASS05B01.V42.A2003006.2020313.hdf to netCDF...
    Converting GLASS05B01.V42.A2003007.2020313.hdf to netCDF...
    Converting GLASS05B01.V42.A2003008.2020313.hdf to netCDF...
[OK ] A2003001 -> DSR_GLASS_2003001_8D_p05_Wm2.nc4
[RUN ] A2003009: 2003-01-09 .. 2003-01-16
    Converting GLASS05B01.V42.A2003009.2020313.hd

KeyboardInterrupt: 