Skip to content

Commit

Permalink
bugfix/904: ignore_na only ignores df records if all are Nan (#909)
Browse files Browse the repository at this point in the history
* bugfix/904: ignore_na only ignores df records if all are Nan

fixes #904

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

* fix lint

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>

Signed-off-by: Niels Bantilan <niels.bantilan@gmail.com>
  • Loading branch information
cosmicBboy committed Aug 11, 2022
1 parent 0c8dd03 commit 7a9c6ca
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pandera/check_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def prepare_series_check_output(
"""
if ignore_na:
isna = (
check_obj.isna().any(axis="columns")
check_obj.isna().all(axis="columns")
if isinstance(check_obj, pd.DataFrame)
else check_obj.isna()
)
Expand Down
16 changes: 16 additions & 0 deletions tests/core/test_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,3 +458,19 @@ def test_dataframe_check_schema_error() -> None:
"index == 3 & column == 'b'"
).failure_case.iloc[0]
)


def test_prepare_series_check_output_df_level():
"""Test that dataframe-level checks only ignore rows where all values are null."""
df = pd.DataFrame(
{
"a": [1, 1, 2, 2, 3, 3, None],
"b": [2, 1, 4, 3, 6, 5, None],
"c": [None] * 7,
}
)
check = Check(lambda df: df["b"] == df["a"] * 2, ignore_na=True)
# The last record should evaluate to True, since all values are null
expected_output = [True, False, True, False, True, False, True]
result = check(df)
assert result.check_output.tolist() == expected_output

0 comments on commit 7a9c6ca

Please sign in to comment.