diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 873c1e7cd41cc..19043e7d1143a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -838,6 +838,7 @@ Other - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) +- Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index f957ebdeaf67a..3487f5ebd050d 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -430,7 +430,7 @@ def mode(ndarray[htfunc_t] values, bint dropna, const uint8_t[:] mask=None): if na_counter > 0: res_mask = np.zeros(j+1, dtype=np.bool_) - res_mask[j] = True + res_mask[j] = (na_counter == max_count) return modes[:j + 1], res_mask diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 0c0232bdc6d4c..76f2fdad591ff 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -987,7 +987,7 @@ def duplicated( def mode( values: ArrayLike, dropna: bool = True, mask: npt.NDArray[np.bool_] | None = None -) -> ArrayLike: +) -> tuple[np.ndarray, npt.NDArray[np.bool_]] | ExtensionArray: """ Returns the mode(s) of an array. @@ -1000,7 +1000,7 @@ def mode( Returns ------- - np.ndarray or ExtensionArray + Union[Tuple[np.ndarray, npt.NDArray[np.bool_]], ExtensionArray] """ values = _ensure_arraylike(values, func_name="mode") original = values @@ -1014,8 +1014,10 @@ def mode( values = _ensure_data(values) npresult, res_mask = htable.mode(values, dropna=dropna, mask=mask) - if res_mask is not None: - return npresult, res_mask # type: ignore[return-value] + if res_mask is None: + res_mask = np.zeros(npresult.shape, dtype=np.bool_) + else: + return npresult, res_mask try: npresult = safe_sort(npresult) @@ -1026,7 +1028,7 @@ def mode( ) result = _reconstruct_data(npresult, original.dtype, original) - return result + return result, res_mask def rank( diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index dbf2090e53579..dad38abccf4ee 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -2511,8 +2511,9 @@ def _mode(self, dropna: bool = True) -> Self: Sorted, if possible. """ # error: Incompatible return value type (got "Union[ExtensionArray, - # ndarray[Any, Any]]", expected "Self") - return mode(self, dropna=dropna) # type: ignore[return-value] + # Tuple[np.ndarray, npt.NDArray[np.bool_]]", expected "Self") + result, _ = mode(self, dropna=dropna) + return result # type: ignore[return-value] def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): if any( diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0ce700772fdcc..647530151d5f6 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2477,7 +2477,7 @@ def _mode(self, dropna: bool = True) -> Categorical: if dropna: mask = self.isna() - res_codes = algorithms.mode(codes, mask=mask) + res_codes, _ = algorithms.mode(codes, mask=mask) res_codes = cast(np.ndarray, res_codes) assert res_codes.dtype == codes.dtype res = self._from_backing_data(res_codes) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8a79ab53442c3..eba738c926497 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1635,7 +1635,7 @@ def _mode(self, dropna: bool = True): if dropna: mask = self.isna() - i8modes = algorithms.mode(self.view("i8"), mask=mask) + i8modes, _ = algorithms.mode(self.view("i8"), mask=mask) npmodes = i8modes.view(self._ndarray.dtype) npmodes = cast(np.ndarray, npmodes) return self._from_backing_data(npmodes) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index f3a0cc0dccdb3..708a3818bcbb7 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1099,12 +1099,8 @@ def value_counts(self, dropna: bool = True) -> Series: return Series(arr, index=index, name="count", copy=False) def _mode(self, dropna: bool = True) -> Self: - if dropna: - result = mode(self._data, dropna=dropna, mask=self._mask) - res_mask = np.zeros(result.shape, dtype=np.bool_) - else: - result, res_mask = mode(self._data, dropna=dropna, mask=self._mask) - result = type(self)(result, res_mask) # type: ignore[arg-type] + result, res_mask = mode(self._data, dropna=dropna, mask=self._mask) + result = type(self)(result, res_mask) return result[result.argsort()] @doc(ExtensionArray.equals) diff --git a/pandas/core/series.py b/pandas/core/series.py index da46f8ede3409..258e0100a8558 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2071,7 +2071,7 @@ def mode(self, dropna: bool = True) -> Series: # TODO: Add option for bins like value_counts() values = self._values if isinstance(values, np.ndarray): - res_values = algorithms.mode(values, dropna=dropna) + res_values, _ = algorithms.mode(values, dropna=dropna) else: res_values = values._mode(dropna=dropna) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 86ce60b1fc12b..5ffada8b95753 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -51,6 +51,29 @@ def test_mode_nullable_dtype(any_numeric_ea_dtype): tm.assert_series_equal(result, expected) +def test_mode_nullable_dtype_edge_case(any_numeric_ea_dtype): + # GH##58926 + ser = Series([1, 2, 3, 1], dtype=any_numeric_ea_dtype) + result = ser.mode(dropna=False) + expected = Series([1], dtype=any_numeric_ea_dtype) + tm.assert_series_equal(result, expected) + + ser2 = Series([1, 1, 2, 3, pd.NA], dtype=any_numeric_ea_dtype) + result = ser2.mode(dropna=False) + expected = Series([1], dtype=any_numeric_ea_dtype) + tm.assert_series_equal(result, expected) + + ser3 = Series([1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype) + result = ser3.mode(dropna=False) + expected = Series([pd.NA], dtype=any_numeric_ea_dtype) + tm.assert_series_equal(result, expected) + + ser4 = Series([1, 1, pd.NA, pd.NA], dtype=any_numeric_ea_dtype) + result = ser4.mode(dropna=False) + expected = Series([1, pd.NA], dtype=any_numeric_ea_dtype) + tm.assert_series_equal(result, expected) + + def test_mode_infer_string(): # GH#56183 pytest.importorskip("pyarrow") diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 611b92eb022d6..7fb421e27bb40 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1831,7 +1831,8 @@ def test_pct_max_many_rows(self): class TestMode: def test_no_mode(self): exp = Series([], dtype=np.float64, index=Index([], dtype=int)) - tm.assert_numpy_array_equal(algos.mode(np.array([])), exp.values) + result, _ = algos.mode(np.array([])) + tm.assert_numpy_array_equal(result, exp.values) def test_mode_single(self, any_real_numpy_dtype): # GH 15714 @@ -1843,20 +1844,24 @@ def test_mode_single(self, any_real_numpy_dtype): ser = Series(data_single, dtype=any_real_numpy_dtype) exp = Series(exp_single, dtype=any_real_numpy_dtype) - tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + result, _ = algos.mode(ser.values) + tm.assert_numpy_array_equal(result, exp.values) tm.assert_series_equal(ser.mode(), exp) ser = Series(data_multi, dtype=any_real_numpy_dtype) exp = Series(exp_multi, dtype=any_real_numpy_dtype) - tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + result, _ = algos.mode(ser.values) + tm.assert_numpy_array_equal(result, exp.values) tm.assert_series_equal(ser.mode(), exp) def test_mode_obj_int(self): exp = Series([1], dtype=int) - tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values) + result, _ = algos.mode(exp.values) + tm.assert_numpy_array_equal(result, exp.values) exp = Series(["a", "b", "c"], dtype=object) - tm.assert_numpy_array_equal(algos.mode(exp.values), exp.values) + result, _ = algos.mode(exp.values) + tm.assert_numpy_array_equal(result, exp.values) def test_number_mode(self, any_real_numpy_dtype): exp_single = [1] @@ -1867,12 +1872,14 @@ def test_number_mode(self, any_real_numpy_dtype): ser = Series(data_single, dtype=any_real_numpy_dtype) exp = Series(exp_single, dtype=any_real_numpy_dtype) - tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + result, _ = algos.mode(ser.values) + tm.assert_numpy_array_equal(result, exp.values) tm.assert_series_equal(ser.mode(), exp) ser = Series(data_multi, dtype=any_real_numpy_dtype) exp = Series(exp_multi, dtype=any_real_numpy_dtype) - tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + result, _ = algos.mode(ser.values) + tm.assert_numpy_array_equal(result, exp.values) tm.assert_series_equal(ser.mode(), exp) def test_strobj_mode(self): @@ -1881,7 +1888,8 @@ def test_strobj_mode(self): ser = Series(data, dtype="c") exp = Series(exp, dtype="c") - tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + result, _ = algos.mode(ser.values) + tm.assert_numpy_array_equal(result, exp.values) tm.assert_series_equal(ser.mode(), exp) @pytest.mark.parametrize("dt", [str, object]) @@ -1891,10 +1899,11 @@ def test_strobj_multi_char(self, dt, using_infer_string): ser = Series(data, dtype=dt) exp = Series(exp, dtype=dt) + result, _ = algos.mode(ser.values) if using_infer_string and dt is str: - tm.assert_extension_array_equal(algos.mode(ser.values), exp.values) + tm.assert_extension_array_equal(result, exp.values) else: - tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + tm.assert_numpy_array_equal(result, exp.values) tm.assert_series_equal(ser.mode(), exp) def test_datelike_mode(self): @@ -1928,18 +1937,21 @@ def test_timedelta_mode(self): def test_mixed_dtype(self): exp = Series(["foo"], dtype=object) ser = Series([1, "foo", "foo"]) - tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + result, _ = algos.mode(ser.values) + tm.assert_numpy_array_equal(result, exp.values) tm.assert_series_equal(ser.mode(), exp) def test_uint64_overflow(self): exp = Series([2**63], dtype=np.uint64) ser = Series([1, 2**63, 2**63], dtype=np.uint64) - tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + result, _ = algos.mode(ser.values) + tm.assert_numpy_array_equal(result, exp.values) tm.assert_series_equal(ser.mode(), exp) exp = Series([1, 2**63], dtype=np.uint64) ser = Series([1, 2**63], dtype=np.uint64) - tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values) + result, _ = algos.mode(ser.values) + tm.assert_numpy_array_equal(result, exp.values) tm.assert_series_equal(ser.mode(), exp) def test_categorical(self): @@ -1961,15 +1973,18 @@ def test_categorical(self): def test_index(self): idx = Index([1, 2, 3]) exp = Series([1, 2, 3], dtype=np.int64) - tm.assert_numpy_array_equal(algos.mode(idx), exp.values) + result, _ = algos.mode(idx) + tm.assert_numpy_array_equal(result, exp.values) idx = Index([1, "a", "a"]) exp = Series(["a"], dtype=object) - tm.assert_numpy_array_equal(algos.mode(idx), exp.values) + result, _ = algos.mode(idx) + tm.assert_numpy_array_equal(result, exp.values) idx = Index([1, 1, 2, 3, 3]) exp = Series([1, 3], dtype=np.int64) - tm.assert_numpy_array_equal(algos.mode(idx), exp.values) + result, _ = algos.mode(idx) + tm.assert_numpy_array_equal(result, exp.values) idx = Index( ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"],