In [None]:
"""
Time-Series Combiner for NetCDF Files
=====================================

Steps performed
---------------
1. Gather all NetCDF files in INPUT_DIR that match FILE_GLOB.
2. Extract a timestamp from each filename with REGEX_TIME.
3. Open the files lazily (dask) and add the timestamp as a new
   time coordinate.
4. Concatenate along the "time" dimension.
5. (Optional) Keep only the variables listed in VARS_TO_KEEP.
6. Save the combined dataset to OUTPUT_FILE.

"""

from pathlib import Path
import re
import pandas as pd
import xarray as xr

# ========= USER CONFIGURATION ==========================================
INPUT_DIR      = r"Path to Input Directory"             # folder with NetCDF files
FILE_GLOB      = "Temp_*.nc"                            # glob pattern relative to INPUT_DIR
REGEX_TIME     = r"(\d{2})_(\d{2})_(\d{4})_(\d{2})_(\d{2})"   # dd_mm_yyyy_hh_mm
VARS_TO_KEEP   = None                                   # e.g. ["Temp"] or None for all
OUTPUT_FILE    = r"Path to Output Directory"
OVERWRITE      = False                                  # set True to replace existing file
CHUNKS         = "auto"                                 # dask chunking for open_dataset
# =======================================================================


def extract_timestamp(path: Path, pattern: str) -> pd.Timestamp:
    """
    Parse *pattern* from *path.name* and return a pandas.Timestamp.
    Expected order in regex groups: DD MM YYYY HH MM.
    """
    m = re.search(pattern, path.name)
    if not m:
        raise ValueError(f"Cannot find timestamp in '{path.name}' "
                         f"with pattern '{pattern}'")
    dd, mm, yyyy, hh, minute = m.groups()
    return pd.Timestamp(f"{yyyy}-{mm}-{dd} {hh}:{minute}")


def build_time_series(in_dir: Path,
                      file_glob: str,
                      regex_time: str,
                      vars_to_keep=None,
                      chunks="auto"):
    """Return an xarray Dataset concatenated along a new 'time' dimension."""
    files = sorted(in_dir.glob(file_glob))
    if not files:
        raise FileNotFoundError(f"No files match '{file_glob}' in {in_dir}")

    datasets = []
    for fp in files:
        ts = extract_timestamp(fp, regex_time)
        ds = xr.open_dataset(fp, chunks=chunks)
        if vars_to_keep is not None:
            missing = [v for v in vars_to_keep if v not in ds]
            if missing:
                raise KeyError(f"{fp.name}: missing var(s) {missing}")
            ds = ds[vars_to_keep]
        ds = ds.expand_dims(time=[ts])         # add new coordinate
        datasets.append(ds)

    combined = xr.concat(datasets, dim="time")
    return combined.sortby("time")            # ensure chronological order


def main():
    in_dir  = Path(INPUT_DIR).expanduser().resolve()
    out_fp  = Path(OUTPUT_FILE).expanduser().resolve()
    if out_fp.exists() and not OVERWRITE:
        raise FileExistsError(f"{out_fp} exists (set OVERWRITE=True).")

    combined_ts = build_time_series(in_dir,
                                    FILE_GLOB,
                                    REGEX_TIME,
                                    vars_to_keep=VARS_TO_KEEP,
                                    chunks=CHUNKS)

    combined_ts.to_netcdf(out_fp, mode="w", engine="netcdf4")
    print(f"✔  Combined dataset saved to {out_fp}")


if __name__ == "__main__":
    main()


✔  Combined dataset saved to D:\New folder\Temp_series.nc
