Skip to content

Commit

Permalink
Handle 0D data
Browse files Browse the repository at this point in the history
  • Loading branch information
maxwelllevin committed Apr 26, 2023
1 parent 4949a25 commit ad121a2
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 12 deletions.
16 changes: 11 additions & 5 deletions src/ncconvert/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,18 @@ def _to_dataframe_collection(
dims = tuple(str(d) for d in data_var.dims)
dimension_groups[dims].append(var_name)

# Save each dimension group to a file
# Create DataFrame collection
for dim_group, variable_names in dimension_groups.items():
df = dataset[variable_names].to_dataframe(dim_order=dim_group)
dim_group_path = Path(filepath).with_suffix(
f".{'.'.join(dim_group)}.{extension}"
)
if dim_group == ():
# to_dataframe() doesn't support 0-D data so we make it into a series and
# then convert it into a DataFrame
df = pd.DataFrame(dataset[variable_names].to_pandas()).T
dim_group_path = Path(filepath).with_suffix(f".{extension}")
else:
df = dataset[variable_names].to_dataframe(dim_order=dim_group)
dim_group_path = Path(filepath).with_suffix(
f".{'.'.join(dim_group)}.{extension}"
)
outputs.append((dim_group_path, df))

return tuple(outputs)
1 change: 1 addition & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def dataset() -> xr.Dataset:
[1, 2, 3, 4],
{"units": "1", "_FillValue": -9999.0},
),
"static": 1.5,
},
attrs={
"datastream": "humboldt.buoy.c1",
Expand Down
13 changes: 9 additions & 4 deletions test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,26 @@ def test_csv_collection(dataset: xr.Dataset):

output_paths, metadata_path = to_csv_collection(dataset, filepath)

assert len(output_paths) == 3
assert len(output_paths) == 4
assert filepath.with_suffix(".csv") in output_paths
assert filepath.with_suffix(".height.csv") in output_paths
assert filepath.with_suffix(".time.csv") in output_paths
assert filepath.with_suffix(".time.height.csv") in output_paths
assert metadata_path == filepath.with_suffix(".json")

h_df = pd.read_csv(sorted(output_paths)[0]) # type: ignore
df = pd.read_csv(sorted(output_paths)[0]) # type: ignore
assert len(df.index) == 1
assert "static" in list(df.columns) # may also have 'Unnamed: 0' column

h_df = pd.read_csv(sorted(output_paths)[1]) # type: ignore
assert len(h_df.index) == len(dataset.height)
assert list(h_df.columns) == ["height", "other"]

t_df = pd.read_csv(sorted(output_paths)[1]) # type: ignore
t_df = pd.read_csv(sorted(output_paths)[2]) # type: ignore
assert len(t_df.index) == len(dataset.time)
assert list(t_df.columns) == ["time", "humidity"]

th_df = pd.read_csv(sorted(output_paths)[2]) # type: ignore
th_df = pd.read_csv(sorted(output_paths)[3]) # type: ignore
assert len(th_df.index) == len(dataset.time) * len(dataset.height)
assert list(th_df.columns) == ["time", "height", "temperature"]

Expand Down
11 changes: 8 additions & 3 deletions test/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ def test_parquet_collection(dataset: xr.Dataset):

output_paths, metadata_path = to_parquet_collection(dataset, filepath)

assert len(output_paths) == 3
assert len(output_paths) == 4
assert filepath.with_suffix(".height.parquet") in output_paths
assert filepath.with_suffix(".parquet") in output_paths
assert filepath.with_suffix(".time.parquet") in output_paths
assert filepath.with_suffix(".time.height.parquet") in output_paths
assert metadata_path == filepath.with_suffix(".json")
Expand All @@ -47,11 +48,15 @@ def test_parquet_collection(dataset: xr.Dataset):
assert len(h_df.index) == len(dataset.height)
assert list(h_df.columns) == ["other"]

th_df = pd.read_parquet(sorted(output_paths)[1]) # type: ignore
df = pd.read_parquet(sorted(output_paths)[1]) # type: ignore
assert len(df.index) == 1
assert list(df.columns) == ["static"]

th_df = pd.read_parquet(sorted(output_paths)[2]) # type: ignore
assert len(th_df.index) == len(dataset.time) * len(dataset.height)
assert list(th_df.columns) == ["temperature"]

t_df = pd.read_parquet(sorted(output_paths)[2]) # type: ignore
t_df = pd.read_parquet(sorted(output_paths)[3]) # type: ignore
assert len(t_df.index) == len(dataset.time)
assert list(t_df.columns) == ["humidity"]

Expand Down

0 comments on commit ad121a2

Please sign in to comment.