Handle 0D data

tsdat · Apr 26, 2023 · ad121a2 · ad121a2
1 parent 4949a25
commit ad121a2
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 12 deletions.
diff --git a/src/ncconvert/utils.py b/src/ncconvert/utils.py
@@ -38,12 +38,18 @@ def _to_dataframe_collection(
         dims = tuple(str(d) for d in data_var.dims)
         dimension_groups[dims].append(var_name)
 
-    # Save each dimension group to a file
+    # Create DataFrame collection
     for dim_group, variable_names in dimension_groups.items():
-        df = dataset[variable_names].to_dataframe(dim_order=dim_group)
-        dim_group_path = Path(filepath).with_suffix(
-            f".{'.'.join(dim_group)}.{extension}"
-        )
+        if dim_group == ():
+            # to_dataframe() doesn't support 0-D data so we make it into a series and
+            # then convert it into a DataFrame
+            df = pd.DataFrame(dataset[variable_names].to_pandas()).T
+            dim_group_path = Path(filepath).with_suffix(f".{extension}")
+        else:
+            df = dataset[variable_names].to_dataframe(dim_order=dim_group)
+            dim_group_path = Path(filepath).with_suffix(
+                f".{'.'.join(dim_group)}.{extension}"
+            )
         outputs.append((dim_group_path, df))
 
     return tuple(outputs)
diff --git a/test/conftest.py b/test/conftest.py
@@ -44,6 +44,7 @@ def dataset() -> xr.Dataset:
                 [1, 2, 3, 4],
                 {"units": "1", "_FillValue": -9999.0},
             ),
+            "static": 1.5,
         },
         attrs={
             "datastream": "humboldt.buoy.c1",

diff --git a/test/test_csv.py b/test/test_csv.py
@@ -37,21 +37,26 @@ def test_csv_collection(dataset: xr.Dataset):
 
     output_paths, metadata_path = to_csv_collection(dataset, filepath)
 
-    assert len(output_paths) == 3
+    assert len(output_paths) == 4
+    assert filepath.with_suffix(".csv") in output_paths
     assert filepath.with_suffix(".height.csv") in output_paths
     assert filepath.with_suffix(".time.csv") in output_paths
     assert filepath.with_suffix(".time.height.csv") in output_paths
     assert metadata_path == filepath.with_suffix(".json")
 
-    h_df = pd.read_csv(sorted(output_paths)[0])  # type: ignore
+    df = pd.read_csv(sorted(output_paths)[0])  # type: ignore
+    assert len(df.index) == 1
+    assert "static" in list(df.columns)  # may also have 'Unnamed: 0' column
+
+    h_df = pd.read_csv(sorted(output_paths)[1])  # type: ignore
     assert len(h_df.index) == len(dataset.height)
     assert list(h_df.columns) == ["height", "other"]
 
-    t_df = pd.read_csv(sorted(output_paths)[1])  # type: ignore
+    t_df = pd.read_csv(sorted(output_paths)[2])  # type: ignore
     assert len(t_df.index) == len(dataset.time)
     assert list(t_df.columns) == ["time", "humidity"]
 
-    th_df = pd.read_csv(sorted(output_paths)[2])  # type: ignore
+    th_df = pd.read_csv(sorted(output_paths)[3])  # type: ignore
     assert len(th_df.index) == len(dataset.time) * len(dataset.height)
     assert list(th_df.columns) == ["time", "height", "temperature"]
 

diff --git a/test/test_parquet.py b/test/test_parquet.py
@@ -37,8 +37,9 @@ def test_parquet_collection(dataset: xr.Dataset):
 
     output_paths, metadata_path = to_parquet_collection(dataset, filepath)
 
-    assert len(output_paths) == 3
+    assert len(output_paths) == 4
     assert filepath.with_suffix(".height.parquet") in output_paths
+    assert filepath.with_suffix(".parquet") in output_paths
     assert filepath.with_suffix(".time.parquet") in output_paths
     assert filepath.with_suffix(".time.height.parquet") in output_paths
     assert metadata_path == filepath.with_suffix(".json")
@@ -47,11 +48,15 @@ def test_parquet_collection(dataset: xr.Dataset):
     assert len(h_df.index) == len(dataset.height)
     assert list(h_df.columns) == ["other"]
 
-    th_df = pd.read_parquet(sorted(output_paths)[1])  # type: ignore
+    df = pd.read_parquet(sorted(output_paths)[1])  # type: ignore
+    assert len(df.index) == 1
+    assert list(df.columns) == ["static"]
+
+    th_df = pd.read_parquet(sorted(output_paths)[2])  # type: ignore
     assert len(th_df.index) == len(dataset.time) * len(dataset.height)
     assert list(th_df.columns) == ["temperature"]
 
-    t_df = pd.read_parquet(sorted(output_paths)[2])  # type: ignore
+    t_df = pd.read_parquet(sorted(output_paths)[3])  # type: ignore
     assert len(t_df.index) == len(dataset.time)
     assert list(t_df.columns) == ["humidity"]