Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add title/description fields #754

Merged
merged 9 commits into from
Feb 5, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 13 additions & 0 deletions pandera/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def __init__(
error: Optional[str] = None,
raise_warning: bool = False,
n_failure_cases: Union[int, None] = constants.N_FAILURE_CASES,
title: Optional[str] = None,
description: Optional[str] = None,
**check_kwargs,
) -> None:
"""Apply a validation function to each element, Series, or DataFrame.
Expand Down Expand Up @@ -173,6 +175,8 @@ def __init__(
check is informational and shouldn't stop execution of the program.
:param n_failure_cases: report the first n unique failure cases. If
None, report all failure cases.
:param title: A human-readable label for the check.
:param description: An arbitrary textual description of the check.
:param check_kwargs: key-word arguments to pass into ``check_fn``

:example:
Expand All @@ -187,6 +191,13 @@ def __init__(
>>> # define an element-wise check
>>> check_even = pa.Check(lambda x: x % 2 == 0, element_wise=True)
>>>
>>> # checks can be given human-readable metadata
>>> check_with_metadata = pa.Check(
>>> lambda x: True,
>>> title="Always passes",
>>> description="This check always passes."
>>> )
>>>
>>> # specify assertions across categorical variables using `groupby`,
>>> # for example, make sure the mean measure for group "A" is always
>>> # larger than the mean measure for group "B"
Expand Down Expand Up @@ -241,6 +252,8 @@ def __init__(
self.ignore_na = ignore_na
self.raise_warning = raise_warning
self.n_failure_cases = n_failure_cases
self.title = title
self.description = description

if groupby is None and groups is not None:
raise ValueError(
Expand Down
2 changes: 2 additions & 0 deletions pandera/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ def to_schema(cls) -> DataFrameSchema:
"name": cls.__config__.name,
"ordered": cls.__config__.ordered,
"unique": cls.__config__.unique,
"title": cls.__config__.title,
"description": cls.__config__.description or cls.__doc__,
}
cls.__schema__ = DataFrameSchema(
columns,
Expand Down
18 changes: 18 additions & 0 deletions pandera/model_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class FieldInfo:
"alias",
"original_name",
"dtype_kwargs",
"title",
"description",
)

def __init__(
Expand All @@ -69,6 +71,8 @@ def __init__(
alias: Any = None,
check_name: Optional[bool] = None,
dtype_kwargs: Optional[Dict[str, Any]] = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
self.checks = _to_checklist(checks)
self.nullable = nullable
Expand All @@ -80,6 +84,8 @@ def __init__(
self.check_name = check_name
self.original_name = cast(str, None) # always set by SchemaModel
self.dtype_kwargs = dtype_kwargs
self.title = title
self.description = description

@property
def name(self) -> str:
Expand Down Expand Up @@ -147,6 +153,8 @@ def to_column(
required=required,
name=name,
checks=checks,
title=self.title,
description=self.description,
)

def to_index(
Expand All @@ -165,6 +173,8 @@ def to_index(
coerce=self.coerce,
name=name,
checks=checks,
title=self.title,
description=self.description,
)


Expand Down Expand Up @@ -195,6 +205,8 @@ def Field(
alias: Any = None,
check_name: Optional[bool] = None,
dtype_kwargs: Optional[Dict[str, Any]] = None,
title: Optional[str] = None,
description: Optional[str] = None,
**kwargs,
) -> Any:
"""Used to provide extra information about a field of a SchemaModel.
Expand Down Expand Up @@ -229,6 +241,8 @@ def Field(
for columns and multi-index, and to `False` for a single index.
:param dtype_kwargs: The parameters to be forwarded to the type of the
field.
:param title: A human-readable label for the field.
:param description: An arbitrary textual description of the field.
:param kwargs: Specify custom checks that have been registered with the
:class:`~pandera.extensions.register_check_method` decorator.
"""
Expand Down Expand Up @@ -269,6 +283,8 @@ def Field(
regex=regex,
check_name=check_name,
alias=alias,
title=title,
description=description,
dtype_kwargs=dtype_kwargs,
)

Expand Down Expand Up @@ -362,6 +378,7 @@ def check(*fields, regex: bool = False, **check_kwargs) -> ClassCheck:

def _wrapper(fn: Union[classmethod, AnyCallable]) -> classmethod:
check_fn, check_method = _to_function_and_classmethod(fn)
check_kwargs.setdefault("description", fn.__doc__)
setattr(
check_method,
CHECK_KEY,
Expand All @@ -388,6 +405,7 @@ def dataframe_check(_fn=None, **check_kwargs) -> ClassCheck:

def _wrapper(fn: Union[classmethod, AnyCallable]) -> classmethod:
check_fn, check_method = _to_function_and_classmethod(fn)
check_kwargs.setdefault("description", fn.__doc__)
setattr(
check_method,
DATAFRAME_CHECK_KEY,
Expand Down
9 changes: 9 additions & 0 deletions pandera/schema_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def __init__(
name: Union[str, Tuple[str, ...], None] = None,
regex: bool = False,
pandas_dtype: PandasDtypeInputTypes = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
"""Create column validator object.

Expand Down Expand Up @@ -69,6 +71,9 @@ def __init__(

.. warning:: This option will be deprecated in 0.8.0

:param title: A human-readable label for the column.
:param description: An arbitrary textual description of the column.

:raises SchemaInitError: if impossible to build schema from parameters

:example:
Expand Down Expand Up @@ -97,6 +102,8 @@ def __init__(
coerce,
name,
pandas_dtype,
title,
description,
)
if (
name is not None
Expand Down Expand Up @@ -133,6 +140,8 @@ def properties(self) -> Dict[str, Any]:
"required": self.required,
"name": self._name,
"regex": self._regex,
"title": self.title,
"description": self.description,
}

def set_name(self, name: str):
Expand Down
38 changes: 38 additions & 0 deletions pandera/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ def __init__(
ordered: bool = False,
pandas_dtype: PandasDtypeInputTypes = None,
unique: Optional[Union[str, List[str]]] = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
"""Initialize DataFrameSchema validator.

Expand Down Expand Up @@ -136,6 +138,8 @@ def __init__(
.. warning:: This option will be deprecated in 0.8.0

:param unique: a list of columns that should be jointly unique.
:param title: A human-readable label for the schema.
:param description: An arbitrary textual description of the schema.

:raises SchemaInitError: if impossible to build schema from parameters
:raises SchemaInitError: if ``dtype`` and ``pandas_dtype`` are both
Expand Down Expand Up @@ -207,6 +211,8 @@ def __init__(
self._coerce = coerce
self._ordered = ordered
self._unique = unique
self._title = title
self._description = description
self._validate_schema()
self._set_column_names()

Expand Down Expand Up @@ -244,6 +250,16 @@ def ordered(self, value: bool) -> None:
"""Set ordered attribute"""
self._ordered = value

@property
def title(self):
"""A human-readable label for the schema."""
return self._title

@property
def description(self):
"""An arbitrary textual description of the schema."""
return self._description

# the _is_inferred getter and setter methods are not public
@property
def _is_inferred(self) -> bool:
Expand Down Expand Up @@ -1622,6 +1638,8 @@ def __init__(
coerce: bool = False,
name: Any = None,
pandas_dtype: PandasDtypeInputTypes = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
"""Initialize series schema base object.

Expand Down Expand Up @@ -1653,6 +1671,8 @@ def __init__(

.. warning:: This option will be deprecated in 0.8.0

:param title: A human-readable label for the series.
:param description: An arbitrary textual description of the series.
:type nullable: bool
"""
if checks is None:
Expand All @@ -1676,6 +1696,8 @@ def __init__(
self._checks = checks
self._name = name
self._unique = unique
self._title = title
self._description = description

for check in self.checks:
if check.groupby is not None and not self._allow_groupby:
Expand Down Expand Up @@ -1759,6 +1781,16 @@ def name(self) -> Union[str, None]:
"""Get SeriesSchema name."""
return self._name

@property
def title(self):
"""A human-readable label for the series."""
return self._title

@property
def description(self):
"""An arbitrary textual description of the series."""
return self._description

@property
def dtype(
self,
Expand Down Expand Up @@ -2078,6 +2110,8 @@ def __init__(
coerce: bool = False,
name: str = None,
pandas_dtype: PandasDtypeInputTypes = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
"""Initialize series schema base object.

Expand Down Expand Up @@ -2107,6 +2141,8 @@ def __init__(
where ``pandas_dtype=None``.
:param name: series name.
:param pandas_dtype: alias of ``dtype`` for backwards compatibility.
:param title: A human-readable label for the series.
:param description: An arbitrary textual description of the series.

.. warning:: This option will be deprecated in 0.8.0

Expand All @@ -2120,6 +2156,8 @@ def __init__(
coerce,
name,
pandas_dtype,
title,
description,
)
self.index = index

Expand Down
4 changes: 2 additions & 2 deletions pandera/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ def _wrapper(cls, *args, **kwargs):
# pylint: disable=line-too-long
# Values taken from
# https://hypothesis.readthedocs.io/en/latest/_modules/hypothesis/extra/numpy.html#from_dtype # noqa
MIN_DT_VALUE = -(2 ** 63)
MAX_DT_VALUE = 2 ** 63 - 1
MIN_DT_VALUE = -(2**63)
MAX_DT_VALUE = 2**63 - 1


def _is_datetime_tz(pandera_dtype: DataType) -> bool:
Expand Down
2 changes: 2 additions & 0 deletions pandera/typing/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ class BaseConfig: # pylint:disable=R0903
"""

name: Optional[str] = None #: name of schema
title: Optional[str] = None #: human-readable label for schema
description: Optional[str] = None #: arbitrary textual description
coerce: bool = False #: coerce types of all schema components

#: make sure certain column combinations are unique
Expand Down
11 changes: 11 additions & 0 deletions tests/core/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,8 @@ class Config:
name = "Child schema"
strict = True
multiindex_strict = False
description = "foo"
title = "bar"

expected = pa.DataFrameSchema(
columns={"a": pa.Column(int), "b": pa.Column(int)},
Expand All @@ -686,10 +688,19 @@ class Config:
coerce=True,
strict=True,
ordered=True,
description="foo",
title="bar"
cosmicBboy marked this conversation as resolved.
Show resolved Hide resolved
)

assert expected == Child.to_schema()

def test_config_docstrings() -> None:

class Model(pa.SchemaModel):
"""foo"""
a: Series[int]

assert Model.__doc__ == Model.to_schema().description

class Input(pa.SchemaModel):
a: Series[int]
Expand Down