From be68c34ca493385c9562abefe511099cc3ced38f Mon Sep 17 00:00:00 2001 From: josephnowak Date: Mon, 18 Aug 2025 20:22:13 +0200 Subject: [PATCH 1/4] Recreate the error on flox --- tests/test_xarray.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_xarray.py b/tests/test_xarray.py index 737c7413..c5d5bd30 100644 --- a/tests/test_xarray.py +++ b/tests/test_xarray.py @@ -798,3 +798,20 @@ def test_groupby_preserve_dtype(reduction): expected = getattr(np, reduction)(ds.test.data, axis=0).dtype assert actual == expected + + +@requires_dask +def test_resample_first_last_empty(): + xr.set_options(use_flox=True, use_numbagg=False, use_bottleneck=False) + + from dask.distributed import Client + + Client() + arr = xr.DataArray( + np.nan, + coords={ + "date": pd.to_datetime(["2025-03-24", "2025-06-23"]), + }, + dims=["date"], + ).chunk(date=(1, 1)) + arr.resample(date="QE").last().compute() From e1881fb6c901a4ef55ecfed73c8c085779389439 Mon Sep 17 00:00:00 2001 From: josephnowak Date: Mon, 18 Aug 2025 20:52:38 +0200 Subject: [PATCH 2/4] Replicate the error without using dask distributed --- tests/test_xarray.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/tests/test_xarray.py b/tests/test_xarray.py index c5d5bd30..866ceee5 100644 --- a/tests/test_xarray.py +++ b/tests/test_xarray.py @@ -802,16 +802,12 @@ def test_groupby_preserve_dtype(reduction): @requires_dask def test_resample_first_last_empty(): - xr.set_options(use_flox=True, use_numbagg=False, use_bottleneck=False) - - from dask.distributed import Client - - Client() - arr = xr.DataArray( - np.nan, - coords={ - "date": pd.to_datetime(["2025-03-24", "2025-06-23"]), - }, - dims=["date"], - ).chunk(date=(1, 1)) - arr.resample(date="QE").last().compute() + with xr.set_options(use_flox=True), dask.config.set(scheduler="processes"): + arr = xr.DataArray( + np.nan, + coords={ + "date": pd.to_datetime(["2025-03-24", "2025-06-23"]), + }, + dims=["date"], + ).chunk(date=(1, 1)) + arr.resample(date="QE").last().compute() From c684d1fc3d6fc4f22d5e2ba91abd78f872915847 Mon Sep 17 00:00:00 2001 From: josephnowak Date: Mon, 18 Aug 2025 21:55:02 +0200 Subject: [PATCH 3/4] Use == instead of is when comparing the fill_value with xrdtypes.NA --- flox/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flox/core.py b/flox/core.py index d773d189..efdaf39f 100644 --- a/flox/core.py +++ b/flox/core.py @@ -892,7 +892,7 @@ def reindex_( "Currently does not support reindexing with object arrays of tuples. " "These occur when grouping by multi-indexed variables in xarray." ) - if fill_value is xrdtypes.NA or isnull(fill_value): + if fill_value == xrdtypes.NA or isnull(fill_value): new_dtype, fill_value = xrdtypes.maybe_promote(array.dtype) else: new_dtype = array.dtype @@ -1380,7 +1380,7 @@ def _finalize_results( if fill_value is None: raise ValueError("Filling is required but fill_value is None.") # This allows us to match xarray's type promotion rules - if fill_value is xrdtypes.NA: + if fill_value == xrdtypes.NA: new_dtype, fill_value = xrdtypes.maybe_promote(finalized[agg.name].dtype) finalized[agg.name] = finalized[agg.name].astype(new_dtype) From 0a48afc01d06a8e2d2928c8ffa37fc0a19bbdfef Mon Sep 17 00:00:00 2001 From: josephnowak Date: Mon, 18 Aug 2025 23:01:04 +0200 Subject: [PATCH 4/4] Add a comment to explain why we changed from is to == --- flox/core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flox/core.py b/flox/core.py index efdaf39f..3109e85d 100644 --- a/flox/core.py +++ b/flox/core.py @@ -892,6 +892,7 @@ def reindex_( "Currently does not support reindexing with object arrays of tuples. " "These occur when grouping by multi-indexed variables in xarray." ) + # Use '==' instead of 'is', as Dask serialization can break identity checks. if fill_value == xrdtypes.NA or isnull(fill_value): new_dtype, fill_value = xrdtypes.maybe_promote(array.dtype) else: @@ -1380,6 +1381,7 @@ def _finalize_results( if fill_value is None: raise ValueError("Filling is required but fill_value is None.") # This allows us to match xarray's type promotion rules + # Use '==' instead of 'is', as Dask serialization can break identity checks. if fill_value == xrdtypes.NA: new_dtype, fill_value = xrdtypes.maybe_promote(finalized[agg.name].dtype) finalized[agg.name] = finalized[agg.name].astype(new_dtype)