Skip to content

Commit

Permalink
bugfix/1607: coercion error should correctly report relevant failure …
Browse files Browse the repository at this point in the history
…cases

Signed-off-by: cosmicBboy <niels.bantilan@gmail.com>
  • Loading branch information
cosmicBboy committed Apr 30, 2024
1 parent ca82618 commit 8f3e1be
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 5 deletions.
2 changes: 1 addition & 1 deletion pandera/api/polars/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _build_columns( # pylint:disable=too-many-locals
try:
engine_dtype = pe.Engine.dtype(annotation.raw_annotation)
dtype = engine_dtype.type
except TypeError as exc:
except (TypeError, ValueError) as exc:
if annotation.metadata:
if field.dtype_kwargs:
raise TypeError(
Expand Down
2 changes: 1 addition & 1 deletion pandera/backends/polars/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def failure_cases_metadata(
failure_case=pl.Series(
err.failure_cases.rows(named=True)
)
).select(pl.col.failure_case)
).select(pl.col.failure_case.struct.json_encode())
else:
failure_cases_df = err.failure_cases.rename(
{err.failure_cases.columns[0]: "failure_case"}
Expand Down
2 changes: 2 additions & 0 deletions pandera/engines/polars_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ def try_coerce(self, data_container: PolarsDataContainer) -> pl.LazyFrame:
is_coercible, failure_cases = polars_coerce_failure_cases(
data_container=data_container, type_=self.type
)
if data_container.key:
failure_cases = failure_cases.select(data_container.key)
raise errors.ParserError(
f"Could not coerce {_key} LazyFrame with schema "
f"{data_container.lazyframe.schema} "
Expand Down
4 changes: 1 addition & 3 deletions tests/polars/test_polars_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,4 @@ def test_coerce_validation_depth_none(validation_depth_none, schema):
try:
schema.validate(data)
except pa.errors.SchemaError as exc:
assert exc.failure_cases.rows(named=True) == [
{"a": "foo", "b": "c"}
]
assert exc.failure_cases.rows(named=True) == [{"a": "foo"}]
25 changes: 25 additions & 0 deletions tests/polars/test_polars_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,3 +564,28 @@ def test_dataframe_schema_with_tz_agnostic_dates(time_zone, data):
if time_zone:
with pytest.raises(pa.errors.SchemaError):
schema_tz_sensitive.validate(lf)


def test_dataframe_coerce_col_with_null_in_other_column():
class Model(DataFrameModel):
col1: int = pa.Field(nullable=False, coerce=True)
col2: float = pa.Field(nullable=True, coerce=True)

invalid_lf = pl.DataFrame(
{
"col1": ["1", "2", "abc"],
"col2": [1.0, 2.0, None],
}
)

try:
print(Model.validate(invalid_lf, lazy=True))
except pa.errors.SchemaErrors as exc:
failures = exc.failure_cases.select("failure_case").rows(named=True)
# two failures should occur:
# - Coercing "abc" to int
# - Validating that col1 is an integer
assert failures == [
{"failure_case": "abc"},
{"failure_case": "String"},
]

0 comments on commit 8f3e1be

Please sign in to comment.