Skip to content

BUG: Fix inconsistency of converting empty categorical with dtype_backend='pyarrow' #61131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 17, 2025
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
@@ -636,6 +636,7 @@ Bug fixes
Categorical
^^^^^^^^^^^
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
-

Datetimelike
1 change: 1 addition & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
@@ -1127,6 +1127,7 @@ def convert_dtypes(
or (
inferred_dtype.kind not in "iufcb"
and not isinstance(inferred_dtype, StringDtype)
and not isinstance(inferred_dtype, CategoricalDtype)
)
):
if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance(
15 changes: 15 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm

@@ -35,6 +37,19 @@ def test_convert_empty(self):
empty_df = pd.DataFrame()
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())

@td.skip_if_no("pyarrow")
def test_convert_empty_categorical_to_pyarrow(self):
# GH#59934
df = pd.DataFrame(
{
"A": pd.Categorical([None] * 5),
"B": pd.Categorical([None] * 5, categories=["B1", "B2"]),
}
)
converted = df.convert_dtypes(dtype_backend="pyarrow")
expected = df
tm.assert_frame_equal(converted, expected)

def test_convert_dtypes_retain_column_names(self):
# GH#41435
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
10 changes: 10 additions & 0 deletions pandas/tests/series/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@
import pytest

from pandas._libs import lib
import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm
@@ -298,6 +299,15 @@ def test_convert_dtypes_pyarrow_null(self):
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
tm.assert_series_equal(result, expected)

@td.skip_if_no("pyarrow")
@pytest.mark.parametrize("categories", [None, ["S1", "S2"]])
def test_convert_empty_categorical_to_pyarrow(self, categories):
# GH#59934
ser = pd.Series(pd.Categorical([None] * 5, categories=categories))
converted = ser.convert_dtypes(dtype_backend="pyarrow")
expected = ser
tm.assert_series_equal(converted, expected)

def test_convert_dtype_pyarrow_timezone_preserve(self):
# GH 60237
pytest.importorskip("pyarrow")