Skip to content

Commit

Permalink
deprecate transformers argument in DataFrameSchema (#291)
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicBboy committed Oct 18, 2020
1 parent 50e86e4 commit 89c3c91
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 72 deletions.
1 change: 0 additions & 1 deletion docs/source/dataframe_schemas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,6 @@ data pipeline:
},
checks=[],
index=None,
transformer=None,
coerce=False,
strict=True
)
1 change: 0 additions & 1 deletion docs/source/schema_inference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ is a simple example:
},
checks=[],
index=<Schema Index>,
transformer=None,
coerce=True,
strict=False
)
Expand Down
15 changes: 2 additions & 13 deletions pandera/decorators.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Decorators for integrating pandera into existing data pipelines."""

import inspect
import warnings

from collections import OrderedDict
from typing import Any, Callable, List, Union, Tuple, Dict, Optional, NoReturn
Expand Down Expand Up @@ -71,9 +70,7 @@ def check_input(
"""Validate function argument when function is called.
This is a decorator function that validates the schema of a dataframe
argument in a function. Note that if a transformer is specified by the
schema, the decorator will return the transformed dataframe, which will be
passed into the decorated function.
argument in a function.
:param schema: dataframe/series schema object
:param obj_getter: (Default value = None) if int, obj_getter refers to the
Expand Down Expand Up @@ -210,9 +207,7 @@ def check_output(
"""Validate function output.
Similar to input validator, but validates the output of the decorated
function. Note that the `transformer` function supplied to the
DataFrameSchema will not have an effect in the check_output schema
validator.
function.
:param schema: dataframe/series schema object
:param obj_getter: (Default value = None) if int, assumes that the output
Expand Down Expand Up @@ -284,12 +279,6 @@ def _wrapper(
:param kwargs: the dictionary of keyword arguments supplied when the
decorated function was called.
"""
if hasattr(schema, "transformer") and \
getattr(schema, "transformer") is not None:
warnings.warn(
"The schema transformer function has no effect in a "
"check_output decorator. Please perform the necessary "
"transformations in the '%s' function instead." % fn.__name__)
out = fn(*args, **kwargs)
if obj_getter is None:
obj = out
Expand Down
20 changes: 11 additions & 9 deletions pandera/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,18 @@ def __init__(
"DataFrameSchema ; columns with missing pandas_type:" +
", ".join(missing_pandas_type))

if transformer is not None:
warnings.warn(
"The `transformers` argument has been deprecated and will no "
"longer have any effect on validated dataframes. To achieve "
"the same goal, you can apply the function to the validated "
"data with `transformer(schema(df))` or "
"`schema(df).pipe(transformer)`",
DeprecationWarning
)

self.checks = checks
self.index = index
self.transformer = transformer
self.strict = strict
self.name = name
self._coerce = coerce
Expand Down Expand Up @@ -428,10 +437,6 @@ def validate(
error_handler.collected_errors, check_obj)

assert all(check_results)

if self.transformer is not None:
check_obj = self.transformer(check_obj)

return check_obj

def __call__(
Expand Down Expand Up @@ -462,11 +467,10 @@ def __call__(

def __repr__(self):
"""Represent string for logging."""
return "%s(columns=%s, index=%s, transformer=%s, coerce=%s)" % \
return "%s(columns=%s, index=%s, coerce=%s)" % \
(self.__class__.__name__,
self.columns,
self.index,
self.transformer,
self.coerce)

def __str__(self):
Expand All @@ -491,7 +495,6 @@ def _format_multiline(json_str, arg):
"{columns},\n"
"{checks},\n"
"{indent}index={index},\n"
"{indent}transformer={transformer},\n"
"{indent}coerce={coerce},\n"
"{indent}strict={strict}\n"
")"
Expand All @@ -500,7 +503,6 @@ def _format_multiline(json_str, arg):
columns=columns,
checks=checks,
index=str(self.index),
transformer=str(self.transformer),
coerce=self.coerce,
strict=self.strict,
indent=_indent,
Expand Down
52 changes: 4 additions & 48 deletions tests/test_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def test_check_function_decorators():
element_wise=True),
nullable=True)
},
transformer=lambda df: df.assign(e="foo")
)
out_schema = DataFrameSchema(
{
Expand All @@ -50,13 +49,13 @@ def test_func1(dataframe, x):
# pylint: disable=W0613
# disables unused-arguments because handling the second argument is
# what is being tested.
return dataframe.assign(f=["a", "b", "a"])
return dataframe.assign(e="foo", f=["a", "b", "a"])

# case 2: input and output validation using positional arguments
@check_input(in_schema, 1)
@check_output(out_schema, 0)
def test_func2(x, dataframe):
return dataframe.assign(f=["a", "b", "a"]), x
return dataframe.assign(e="foo", f=["a", "b", "a"]), x

# case 3: dataframe to validate is called as a keyword argument and the
# output is in a dictionary
Expand All @@ -65,7 +64,7 @@ def test_func2(x, dataframe):
def test_func3(x, in_dataframe=None):
return {
"x": x,
"out_dataframe": in_dataframe.assign(f=["a", "b", "a"]),
"out_dataframe": in_dataframe.assign(e="foo", f=["a", "b", "a"]),
}

# case 4: dataframe is a positional argument but the obj_getter in the
Expand All @@ -76,7 +75,7 @@ def test_func4(x, dataframe):
# pylint: disable=W0613
# disables unused-arguments because handling the second argument is
# what is being tested.
return dataframe.assign(f=["a", "b", "a"])
return dataframe.assign(e="foo", f=["a", "b", "a"])

df = pd.DataFrame({
"a": [1, 2, 3],
Expand Down Expand Up @@ -157,49 +156,6 @@ def test_incorrect_check_input_index(df):
)


def test_check_output_transformer():
"""Test check warning on output transformer."""

@check_output(
DataFrameSchema(
{"column": Column(int)},
transformer=lambda df: df
)
)
def test_func(df):
return df

with pytest.warns(UserWarning):
test_func(pd.DataFrame({"column": [1, 2, 3]}))


def test_check_function_decorator_transform():
"""Test that transformer argument is in effect in check_input decorator."""

in_schema = DataFrameSchema(
{"column1": Column(Int)},
transformer=lambda df: df.assign(column2="foo"))
out_schema = DataFrameSchema(
{"column1": Column(Int),
"column2": Column(String)})

@check_input(in_schema)
@check_output(out_schema)
def func_input_transform1(df):
return df

result1 = func_input_transform1(pd.DataFrame({"column1": [1, 2, 3]}))
assert "column2" in result1

@check_input(in_schema, 1)
@check_output(out_schema, 1)
def func_input_transform2(_, df):
return _, df

result2 = func_input_transform2(None, pd.DataFrame({"column1": [1, 2, 3]}))
assert "column2" in result2[1]


def test_check_input_method_decorators():
"""Test the check_input and check_output decorator behaviours when the
dataframe is changed within the function being checked"""
Expand Down
6 changes: 6 additions & 0 deletions tests/test_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,3 +951,9 @@ def test_lazy_series_validation_error(schema, data, expectation):
.failure_case.isin(failure_cases)
.all()
)


def test_schema_transformer_deprecated():
"""Using the transformer argument should raise a deprecation warning."""
with pytest.warns(DeprecationWarning):
DataFrameSchema(transformer=lambda df: df)

0 comments on commit 89c3c91

Please sign in to comment.