diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6ba07b1761557..1255c5a557d27 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1143,6 +1143,7 @@ def convert_dtypes( base_dtype.kind == "O" # type: ignore[union-attr] and input_array.size > 0 and isna(input_array).all() + and not isinstance(input_array.dtype, CategoricalDtype) ): import pyarrow as pa diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index e7f6e5d625d3e..ab4f849c741fd 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm @@ -35,6 +37,26 @@ def test_convert_empty(self): empty_df = pd.DataFrame() tm.assert_frame_equal(empty_df, empty_df.convert_dtypes()) + @td.skip_if_no("pyarrow") + def test_convert_empty_categorical_to_pyarrow(self): + # GH#59934 + df = pd.DataFrame( + { + "A": pd.Categorical([None] * 5), + "B": pd.Categorical([None] * 5, categories=["B1", "B2"]), + } + ) + converted = df.convert_dtypes(dtype_backend="pyarrow") + expected = df + tm.assert_frame_equal(converted, expected) + + assert converted.A.dtype == "category", "Dtype in column A is not 'category'" + assert converted.B.dtype == "category", "Dtype in column B is not 'category'" + assert converted.A.cat.categories.empty, "Categories in column A are not empty" + assert converted.B.cat.categories.isin( + ["B1", "B2"] + ).all(), "Categories in column B doesn't contain adequate categories" + def test_convert_dtypes_retain_column_names(self): # GH#41435 df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 90c4056a39e84..cb1fa11c9f7c4 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -4,6 +4,7 @@ import pytest from pandas._libs import lib +import pandas.util._test_decorators as td import pandas as pd import pandas._testing as tm @@ -297,3 +298,20 @@ def test_convert_dtypes_pyarrow_null(self): result = ser.convert_dtypes(dtype_backend="pyarrow") expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null())) tm.assert_series_equal(result, expected) + + @td.skip_if_no("pyarrow") + def test_convert_empty_categorical_to_pyarrow(self): + # GH#59934 + ser1 = pd.Series(pd.Categorical([None] * 5)) + converted1 = ser1.convert_dtypes(dtype_backend="pyarrow") + expected = ser1 + + tm.assert_series_equal(converted1, expected) + assert converted1.dtype == "category", "Series dtype is not 'category'" + assert converted1.cat.categories.empty, "Series categories are not empty" + + ser2 = pd.Series(pd.Categorical([None] * 5, categories=["S1", "S2"])) + converted2 = ser2.convert_dtypes(dtype_backend="pyarrow") + assert converted2.cat.categories.isin( + ["S1", "S2"] + ).all(), "Categories in ser2 doesn't contain adequate categories"