Support era5 and ndbc files in read_dataset and stop relying on lon/l…

…at coordinates in order to identify the appropriate file format
wavespectra · Jan 24, 2024 · 46e4674 · 46e4674
1 parent 3aef315
commit 46e4674
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 20 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -18,11 +18,13 @@ ___________________
 
 New Features
 ------------
+* Support ERA5 and NDBC netcdf file types in `read_dataset` reader.
 * Support datasets with no lat / lon variables when writing octopus and swan ascii.
   There is now an option to specify the coordinates manually or skip specifying them.
 
 Internal Changes
 ----------------
+* Stop relying on lon/lat coordinates in order to identify file types in read_dataset.
 * Ensure octopus writer can handle lon/lat defined as coordinates in dataset rather
   than data_vars.
 * Fix octopus writer to support datasets without site as a dimension.

diff --git a/wavespectra/input/dataset.py b/wavespectra/input/dataset.py
@@ -4,6 +4,8 @@
 from wavespectra.input.ww3 import from_ww3
 from wavespectra.input.wwm import from_wwm
 from wavespectra.input.ncswan import from_ncswan
+from wavespectra.input.era5 import from_era5
+from wavespectra.input.ndbc import from_ndbc
 from wavespectra.specdataset import SpecDataset
 
 logger = logging.getLogger(__name__)
@@ -20,17 +22,12 @@ def read_dataset(dset):
             consistent any supported file format (currently WW3, SWAN and WWM).
 
     """
-    vars_wavespectra = {"freq", "dir", "site", "efth", "lon", "lat"}
-    vars_ww3 = {"frequency", "direction", "station", "efth", "longitude", "latitude"}
-    vars_wwm = {"nfreq", "ndir", "nbstation", "AC", "lon", "lat"}
-    vars_ncswan = {
-        "frequency",
-        "direction",
-        "points",
-        "density",
-        "longitude",
-        "latitude",
-    }
+    vars_wavespectra = {"freq", "dir", "site", "efth"}
+    vars_ww3 = {"frequency", "direction", "station", "efth"}
+    vars_wwm = {"nfreq", "ndir", "nbstation", "AC"}
+    vars_era5 = {"frequency", "direction", "d2fd"}
+    vars_ndbc = {"frequency", "direction", "spectral_wave_density"}
+    vars_ncswan = {"frequency", "direction", "points", "density"}
 
     vars_dset = set(dset.variables.keys()).union(dset.dims)
     if not vars_wavespectra - vars_dset:
@@ -45,6 +42,12 @@ def read_dataset(dset):
     elif not vars_wwm - vars_dset:
         logger.debug("Dataset identified as wwm")
         func = from_wwm
+    elif not vars_era5 - vars_dset:
+        logger.debug("Dataset identified as era5")
+        func = from_era5
+    elif not vars_ndbc - vars_dset:
+        logger.debug("Dataset identified as ndbc")
+        func = from_ndbc
     else:
         raise ValueError(
             f"Cannot identify appropriate reader from dataset variables: {vars_dset}"

diff --git a/wavespectra/input/era5.py b/wavespectra/input/era5.py
@@ -5,6 +5,10 @@
 from wavespectra.input.netcdf import read_netcdf
 
 
+DEFAULT_FREQS = np.full(30, 0.03453) * (1.1 ** np.arange(0, 30))
+DEFAULT_DIRS = (np.arange(7.5, 352.5 + 15, 15) + 180) % 360
+
+
 def read_era5(filename_or_fileglob, chunks={}, freqs=None, dirs=None):
     """Read Spectra from ECMWF ERA5 netCDF format.
 
@@ -21,12 +25,12 @@ def read_era5(filename_or_fileglob, chunks={}, freqs=None, dirs=None):
         - dset (SpecDataset): spectra dataset object read from netcdf file.
 
     Note:
+        - Frequency and diirection coordinates seem to have only integer positions
+          which is why they are allowed to be specified as a parameter.
         - If file is large to fit in memory, consider specifying chunks for
           'time' and/or 'station' dims.
 
     """
-    default_freqs = np.full(30, 0.03453) * (1.1 ** np.arange(0, 30))
-    default_dirs = direction = (np.arange(7.5, 352.5 + 15, 15) + 180) % 360
 
     dset = read_netcdf(
         filename_or_fileglob,
@@ -38,15 +42,28 @@ def read_era5(filename_or_fileglob, chunks={}, freqs=None, dirs=None):
         timename="time",
         chunks=chunks,
     )
+    return from_era5(dset, freqs=freqs, dirs=dirs)
+
+
+def from_era5(dset, freqs=None, dirs=None):
+    """Format ERA5 netcdf dataset to receive wavespectra accessor.
+
+    Args:
+        - dset (xr.Dataset): Dataset created from a SWAN netcdf file.
+
+    Returns:
+        - Formated dataset with the SpecDataset accessor in the `spec` namespace.
+
+    """
 
     # Convert ERA5 format to wavespectra format
     dset = 10**dset * np.pi / 180
     dset = dset.fillna(0)
 
-    dset[attrs.FREQNAME] = freqs if freqs else default_freqs
-    dset[attrs.DIRNAME] = dirs if dirs else default_dirs
+    dset[attrs.FREQNAME] = freqs if freqs else DEFAULT_FREQS
+    dset[attrs.DIRNAME] = dirs if dirs else DEFAULT_DIRS
 
     # Setting standard attributes
     set_spec_attributes(dset)
 
-    return dset
+    return dset
diff --git a/wavespectra/input/ncswan.py b/wavespectra/input/ncswan.py
@@ -53,11 +53,11 @@ def read_ncswan(filename_or_fileglob, file_format="netcdf", mapping=MAPPING, chu
 def from_ncswan(dset):
     """Format SWAN netcdf dataset to receive wavespectra accessor.
 
-    Args:o
-        dset (xr.Dataset): Dataset created from a SWAN netcdf file.
+    Args:
+        - dset (xr.Dataset): Dataset created from a SWAN netcdf file.
 
     Returns:
-        Formated dataset with the SpecDataset accessor in the `spec` namespace.
+        - Formated dataset with the SpecDataset accessor in the `spec` namespace.
 
     """
     dset = dset.rename(MAPPING)

diff --git a/wavespectra/input/ndbc.py b/wavespectra/input/ndbc.py
@@ -3,7 +3,6 @@
 https://dods.ndbc.noaa.gov/
 
 """
-import warnings
 import xarray as xr
 import numpy as np