Skip to content

Commit

Permalink
fix: pandas pyarrow string validation (#1636)
Browse files Browse the repository at this point in the history
Signed-off-by: Ajith Aravind <ajith.aravind100@gmail.com>
  • Loading branch information
aaravind100 committed May 14, 2024
1 parent 18aacca commit c815a6d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pandera/engines/pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1660,7 +1660,9 @@ class ArrowInt8(ArrowInt16):
type = pd.ArrowDtype(pyarrow.int8())
bit_width: int = 8

@Engine.register_dtype(equivalents=[pyarrow.string])
@Engine.register_dtype(
equivalents=[pyarrow.string, pd.ArrowDtype(pyarrow.string())]
)
@immutable
class ArrowString(DataType, dtypes.String):
"""Semantic representation of a :class:`pyarrow.string`."""
Expand Down
6 changes: 6 additions & 0 deletions tests/core/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,12 @@ def test_invalid_pandas_extension_dtype():

def test_check_equivalent(dtype: Any, pd_dtype: Any):
"""Test that a pandas-compatible dtype can be validated by check()."""
if (
pandas_engine.PYARROW_INSTALLED
and pandas_engine.PANDAS_2_0_0_PLUS
and dtype == "string[pyarrow]"
):
pytest.skip("`string[pyarrow]` gets parsed to type `string` by pandas")
actual_dtype = pandas_engine.Engine.dtype(pd_dtype)
expected_dtype = pandas_engine.Engine.dtype(dtype)
assert actual_dtype.check(expected_dtype)
Expand Down

0 comments on commit c815a6d

Please sign in to comment.