Skip to content

Commit

Permalink
Bugfix/910 (#943)
Browse files Browse the repository at this point in the history
* Added 'ordered' key word to _serialize_schema and _deserialize_schema

* Add tests for new 'ordered' key word in from_yaml() and to_yaml()

* Added ordered:false key at docs/source/schema_inference.rst
  • Loading branch information
dstumpy committed Oct 2, 2022
1 parent f90789c commit dc6b39c
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/source/schema_inference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ is a convenience method for this functionality.
coerce: true
strict: false
unique: null
ordered: false

You can edit this yaml file by specifying column names under the ``column``
key. The respective values map onto key-word arguments in the
Expand Down
2 changes: 2 additions & 0 deletions pandera/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def _serialize_schema(dataframe_schema):
"coerce": dataframe_schema.coerce,
"strict": dataframe_schema.strict,
"unique": dataframe_schema.unique,
"ordered": dataframe_schema.ordered,
}


Expand Down Expand Up @@ -266,6 +267,7 @@ def _deserialize_schema(serialized_schema):
coerce=serialized_schema.get("coerce", False),
strict=serialized_schema.get("strict", False),
unique=serialized_schema.get("unique", None),
ordered=serialized_schema.get("ordered", False),
)


Expand Down
78 changes: 78 additions & 0 deletions tests/io/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ def _create_schema(index="single"):
coerce: false
strict: true
unique: null
ordered: false
"""


Expand Down Expand Up @@ -637,6 +638,50 @@ def test_from_yaml_load_required_fields():
)


@pytest.mark.parametrize(
"is_ordered,test_data,expected",
[
(True, {"b": [1], "a": [1]}, pandera.errors.SchemaError),
(True, {"a": [1], "b": [1]}, pd.DataFrame(data={"a": [1], "b": [1]})),
(False, {"b": [1], "a": [1]}, pd.DataFrame(data={"b": [1], "a": [1]})),
(False, {"a": [1], "b": [1]}, pd.DataFrame(data={"a": [1], "b": [1]})),
],
)
def test_from_yaml_retains_ordered_keyword(is_ordered, test_data, expected):
"""Test that from_yaml() retains the 'ordered' keyword."""
yaml_schema = f"""
schema_type: dataframe
version: {pandera.__version__}
columns:
a:
dtype: int64
required: true
b:
dtype: int64
required: true
checks: null
index: null
coerce: false
strict: false
unique: null
ordered: {str(is_ordered).lower()}
"""

# make sure the schema contains the ordered key word
schema = io.from_yaml(yaml_schema)
assert schema.ordered == is_ordered

# raise the error only when the ordered condition is violated
test_df = pd.DataFrame(data=test_data)

if isinstance(expected, type) and issubclass(expected, Exception):
with pytest.raises(expected):
assert schema.validate(test_df)
else:
validation = schema.validate(test_df)
assert test_df.equals(validation)


def test_io_yaml_file_obj():
"""Test read and write operation on file object."""
schema = _create_schema()
Expand Down Expand Up @@ -836,6 +881,38 @@ def unregistered_check(self, _):
CheckedSchemaModel.to_yaml()


@pytest.mark.parametrize(
"is_ordered,test_data,expected",
[
(True, {"b": [1], "a": [1]}, pandera.errors.SchemaError),
(True, {"a": [1], "b": [1]}, pd.DataFrame(data={"a": [1], "b": [1]})),
(False, {"b": [1], "a": [1]}, pd.DataFrame(data={"b": [1], "a": [1]})),
(False, {"a": [1], "b": [1]}, pd.DataFrame(data={"a": [1], "b": [1]})),
],
)
def test_to_yaml_retains_ordered_keyword(is_ordered, test_data, expected):
"""Test that to_yaml() retains the 'ordered' keyword."""
schema = pandera.DataFrameSchema(
columns={
"a": pandera.Column(pandera.Int),
"b": pandera.Column(pandera.Int),
},
ordered=is_ordered,
)

# make sure the schema contains the ordered key word
yaml_schema = schema.to_yaml()
assert "ordered" in yaml_schema # pylint: disable=E1135

# raise the error only when the ordered condition is violated
if isinstance(expected, type) and issubclass(expected, Exception):
with pytest.raises(expected):
assert schema.validate(pd.DataFrame(data=test_data))
else:
validation_df = schema.validate(pd.DataFrame(data=test_data))
assert validation_df.equals(expected)


def test_serialize_deserialize_custom_datetime_checks():
"""
Test that custom checks for datetime columns can be serialized and
Expand Down Expand Up @@ -1079,6 +1156,7 @@ def datetime_check(pandas_obj, *, stat):
coerce: true
strict: true
unique: null
ordered: false
"""

VALID_FRICTIONLESS_DF = pd.DataFrame(
Expand Down

0 comments on commit dc6b39c

Please sign in to comment.