diff --git a/docs/source/dataframe_schemas.rst b/docs/source/dataframe_schemas.rst index 0075c9aed..5998e4c52 100644 --- a/docs/source/dataframe_schemas.rst +++ b/docs/source/dataframe_schemas.rst @@ -660,7 +660,6 @@ data pipeline: }, checks=[], index=None, - transformer=None, coerce=False, strict=True ) diff --git a/docs/source/schema_inference.rst b/docs/source/schema_inference.rst index db1f7ef3b..e82e43dcb 100644 --- a/docs/source/schema_inference.rst +++ b/docs/source/schema_inference.rst @@ -46,7 +46,6 @@ is a simple example: }, checks=[], index=, - transformer=None, coerce=True, strict=False ) diff --git a/pandera/decorators.py b/pandera/decorators.py index 4a91685e1..de09bae70 100644 --- a/pandera/decorators.py +++ b/pandera/decorators.py @@ -1,7 +1,6 @@ """Decorators for integrating pandera into existing data pipelines.""" import inspect -import warnings from collections import OrderedDict from typing import Any, Callable, List, Union, Tuple, Dict, Optional, NoReturn @@ -71,9 +70,7 @@ def check_input( """Validate function argument when function is called. This is a decorator function that validates the schema of a dataframe - argument in a function. Note that if a transformer is specified by the - schema, the decorator will return the transformed dataframe, which will be - passed into the decorated function. + argument in a function. :param schema: dataframe/series schema object :param obj_getter: (Default value = None) if int, obj_getter refers to the @@ -210,9 +207,7 @@ def check_output( """Validate function output. Similar to input validator, but validates the output of the decorated - function. Note that the `transformer` function supplied to the - DataFrameSchema will not have an effect in the check_output schema - validator. + function. :param schema: dataframe/series schema object :param obj_getter: (Default value = None) if int, assumes that the output @@ -284,12 +279,6 @@ def _wrapper( :param kwargs: the dictionary of keyword arguments supplied when the decorated function was called. """ - if hasattr(schema, "transformer") and \ - getattr(schema, "transformer") is not None: - warnings.warn( - "The schema transformer function has no effect in a " - "check_output decorator. Please perform the necessary " - "transformations in the '%s' function instead." % fn.__name__) out = fn(*args, **kwargs) if obj_getter is None: obj = out diff --git a/pandera/schemas.py b/pandera/schemas.py index 1508c90e7..1a1eeb008 100644 --- a/pandera/schemas.py +++ b/pandera/schemas.py @@ -136,9 +136,18 @@ def __init__( "DataFrameSchema ; columns with missing pandas_type:" + ", ".join(missing_pandas_type)) + if transformer is not None: + warnings.warn( + "The `transformers` argument has been deprecated and will no " + "longer have any effect on validated dataframes. To achieve " + "the same goal, you can apply the function to the validated " + "data with `transformer(schema(df))` or " + "`schema(df).pipe(transformer)`", + DeprecationWarning + ) + self.checks = checks self.index = index - self.transformer = transformer self.strict = strict self.name = name self._coerce = coerce @@ -428,10 +437,6 @@ def validate( error_handler.collected_errors, check_obj) assert all(check_results) - - if self.transformer is not None: - check_obj = self.transformer(check_obj) - return check_obj def __call__( @@ -462,11 +467,10 @@ def __call__( def __repr__(self): """Represent string for logging.""" - return "%s(columns=%s, index=%s, transformer=%s, coerce=%s)" % \ + return "%s(columns=%s, index=%s, coerce=%s)" % \ (self.__class__.__name__, self.columns, self.index, - self.transformer, self.coerce) def __str__(self): @@ -491,7 +495,6 @@ def _format_multiline(json_str, arg): "{columns},\n" "{checks},\n" "{indent}index={index},\n" - "{indent}transformer={transformer},\n" "{indent}coerce={coerce},\n" "{indent}strict={strict}\n" ")" @@ -500,7 +503,6 @@ def _format_multiline(json_str, arg): columns=columns, checks=checks, index=str(self.index), - transformer=str(self.transformer), coerce=self.coerce, strict=self.strict, indent=_indent, diff --git a/tests/test_decorators.py b/tests/test_decorators.py index a93416b95..931af1c70 100644 --- a/tests/test_decorators.py +++ b/tests/test_decorators.py @@ -32,7 +32,6 @@ def test_check_function_decorators(): element_wise=True), nullable=True) }, - transformer=lambda df: df.assign(e="foo") ) out_schema = DataFrameSchema( { @@ -50,13 +49,13 @@ def test_func1(dataframe, x): # pylint: disable=W0613 # disables unused-arguments because handling the second argument is # what is being tested. - return dataframe.assign(f=["a", "b", "a"]) + return dataframe.assign(e="foo", f=["a", "b", "a"]) # case 2: input and output validation using positional arguments @check_input(in_schema, 1) @check_output(out_schema, 0) def test_func2(x, dataframe): - return dataframe.assign(f=["a", "b", "a"]), x + return dataframe.assign(e="foo", f=["a", "b", "a"]), x # case 3: dataframe to validate is called as a keyword argument and the # output is in a dictionary @@ -65,7 +64,7 @@ def test_func2(x, dataframe): def test_func3(x, in_dataframe=None): return { "x": x, - "out_dataframe": in_dataframe.assign(f=["a", "b", "a"]), + "out_dataframe": in_dataframe.assign(e="foo", f=["a", "b", "a"]), } # case 4: dataframe is a positional argument but the obj_getter in the @@ -76,7 +75,7 @@ def test_func4(x, dataframe): # pylint: disable=W0613 # disables unused-arguments because handling the second argument is # what is being tested. - return dataframe.assign(f=["a", "b", "a"]) + return dataframe.assign(e="foo", f=["a", "b", "a"]) df = pd.DataFrame({ "a": [1, 2, 3], @@ -157,49 +156,6 @@ def test_incorrect_check_input_index(df): ) -def test_check_output_transformer(): - """Test check warning on output transformer.""" - - @check_output( - DataFrameSchema( - {"column": Column(int)}, - transformer=lambda df: df - ) - ) - def test_func(df): - return df - - with pytest.warns(UserWarning): - test_func(pd.DataFrame({"column": [1, 2, 3]})) - - -def test_check_function_decorator_transform(): - """Test that transformer argument is in effect in check_input decorator.""" - - in_schema = DataFrameSchema( - {"column1": Column(Int)}, - transformer=lambda df: df.assign(column2="foo")) - out_schema = DataFrameSchema( - {"column1": Column(Int), - "column2": Column(String)}) - - @check_input(in_schema) - @check_output(out_schema) - def func_input_transform1(df): - return df - - result1 = func_input_transform1(pd.DataFrame({"column1": [1, 2, 3]})) - assert "column2" in result1 - - @check_input(in_schema, 1) - @check_output(out_schema, 1) - def func_input_transform2(_, df): - return _, df - - result2 = func_input_transform2(None, pd.DataFrame({"column1": [1, 2, 3]})) - assert "column2" in result2[1] - - def test_check_input_method_decorators(): """Test the check_input and check_output decorator behaviours when the dataframe is changed within the function being checked""" diff --git a/tests/test_schemas.py b/tests/test_schemas.py index cddcf8733..6acdc1c60 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -951,3 +951,9 @@ def test_lazy_series_validation_error(schema, data, expectation): .failure_case.isin(failure_cases) .all() ) + + +def test_schema_transformer_deprecated(): + """Using the transformer argument should raise a deprecation warning.""" + with pytest.warns(DeprecationWarning): + DataFrameSchema(transformer=lambda df: df)