Bugfix/910 (#943)

* Added 'ordered' key word to _serialize_schema and _deserialize_schema * Add tests for new 'ordered' key word in from_yaml() and to_yaml() * Added ordered:false key at docs/source/schema_inference.rst
unionai-oss · Oct 2, 2022 · dc6b39c · dc6b39c
1 parent f90789c
commit dc6b39c
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 0 deletions.
diff --git a/docs/source/schema_inference.rst b/docs/source/schema_inference.rst
@@ -237,6 +237,7 @@ is a convenience method for this functionality.
     coerce: true
     strict: false
     unique: null
+    ordered: false
 
 You can edit this yaml file by specifying column names under the ``column``
 key. The respective values map onto key-word arguments in the

diff --git a/pandera/io.py b/pandera/io.py
@@ -154,6 +154,7 @@ def _serialize_schema(dataframe_schema):
         "coerce": dataframe_schema.coerce,
         "strict": dataframe_schema.strict,
         "unique": dataframe_schema.unique,
+        "ordered": dataframe_schema.ordered,
     }
 
 
@@ -266,6 +267,7 @@ def _deserialize_schema(serialized_schema):
         coerce=serialized_schema.get("coerce", False),
         strict=serialized_schema.get("strict", False),
         unique=serialized_schema.get("unique", None),
+        ordered=serialized_schema.get("ordered", False),
     )
 
 

diff --git a/tests/io/test_io.py b/tests/io/test_io.py
@@ -237,6 +237,7 @@ def _create_schema(index="single"):
 coerce: false
 strict: true
 unique: null
+ordered: false
 """
 
 
@@ -637,6 +638,50 @@ def test_from_yaml_load_required_fields():
         )
 
 
+@pytest.mark.parametrize(
+    "is_ordered,test_data,expected",
+    [
+        (True, {"b": [1], "a": [1]}, pandera.errors.SchemaError),
+        (True, {"a": [1], "b": [1]}, pd.DataFrame(data={"a": [1], "b": [1]})),
+        (False, {"b": [1], "a": [1]}, pd.DataFrame(data={"b": [1], "a": [1]})),
+        (False, {"a": [1], "b": [1]}, pd.DataFrame(data={"a": [1], "b": [1]})),
+    ],
+)
+def test_from_yaml_retains_ordered_keyword(is_ordered, test_data, expected):
+    """Test that from_yaml() retains the 'ordered' keyword."""
+    yaml_schema = f"""
+    schema_type: dataframe
+    version: {pandera.__version__}
+    columns:
+        a:
+            dtype: int64
+            required: true
+        b:
+            dtype: int64
+            required: true
+    checks: null
+    index: null
+    coerce: false
+    strict: false
+    unique: null
+    ordered: {str(is_ordered).lower()}
+    """
+
+    # make sure the schema contains the ordered key word
+    schema = io.from_yaml(yaml_schema)
+    assert schema.ordered == is_ordered
+
+    # raise the error only when the ordered condition is violated
+    test_df = pd.DataFrame(data=test_data)
+
+    if isinstance(expected, type) and issubclass(expected, Exception):
+        with pytest.raises(expected):
+            assert schema.validate(test_df)
+    else:
+        validation = schema.validate(test_df)
+        assert test_df.equals(validation)
+
+
 def test_io_yaml_file_obj():
     """Test read and write operation on file object."""
     schema = _create_schema()
@@ -836,6 +881,38 @@ def unregistered_check(self, _):
         CheckedSchemaModel.to_yaml()
 
 
+@pytest.mark.parametrize(
+    "is_ordered,test_data,expected",
+    [
+        (True, {"b": [1], "a": [1]}, pandera.errors.SchemaError),
+        (True, {"a": [1], "b": [1]}, pd.DataFrame(data={"a": [1], "b": [1]})),
+        (False, {"b": [1], "a": [1]}, pd.DataFrame(data={"b": [1], "a": [1]})),
+        (False, {"a": [1], "b": [1]}, pd.DataFrame(data={"a": [1], "b": [1]})),
+    ],
+)
+def test_to_yaml_retains_ordered_keyword(is_ordered, test_data, expected):
+    """Test that to_yaml() retains the 'ordered' keyword."""
+    schema = pandera.DataFrameSchema(
+        columns={
+            "a": pandera.Column(pandera.Int),
+            "b": pandera.Column(pandera.Int),
+        },
+        ordered=is_ordered,
+    )
+
+    # make sure the schema contains the ordered key word
+    yaml_schema = schema.to_yaml()
+    assert "ordered" in yaml_schema  # pylint: disable=E1135
+
+    # raise the error only when the ordered condition is violated
+    if isinstance(expected, type) and issubclass(expected, Exception):
+        with pytest.raises(expected):
+            assert schema.validate(pd.DataFrame(data=test_data))
+    else:
+        validation_df = schema.validate(pd.DataFrame(data=test_data))
+        assert validation_df.equals(expected)
+
+
 def test_serialize_deserialize_custom_datetime_checks():
     """
     Test that custom checks for datetime columns can be serialized and
@@ -1079,6 +1156,7 @@ def datetime_check(pandas_obj, *, stat):
 coerce: true
 strict: true
 unique: null
+ordered: false
 """
 
 VALID_FRICTIONLESS_DF = pd.DataFrame(