Skip to content

Commit

Permalink
add title/description fields (#754)
Browse files Browse the repository at this point in the history
* add title/description fields

* modify base config with title/desc

* fix docstring

* black

* let SchemaModel use docstring as desc

* update test_config

* fix black

* Update tests/core/test_model.py

* fix black

Co-authored-by: Niels Bantilan <niels.bantilan@gmail.com>
  • Loading branch information
smackesey and cosmicBboy committed Feb 5, 2022
1 parent c7ca562 commit df1e826
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 0 deletions.
13 changes: 13 additions & 0 deletions pandera/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def __init__(
error: Optional[str] = None,
raise_warning: bool = False,
n_failure_cases: Union[int, None] = constants.N_FAILURE_CASES,
title: Optional[str] = None,
description: Optional[str] = None,
**check_kwargs,
) -> None:
"""Apply a validation function to each element, Series, or DataFrame.
Expand Down Expand Up @@ -173,6 +175,8 @@ def __init__(
check is informational and shouldn't stop execution of the program.
:param n_failure_cases: report the first n unique failure cases. If
None, report all failure cases.
:param title: A human-readable label for the check.
:param description: An arbitrary textual description of the check.
:param check_kwargs: key-word arguments to pass into ``check_fn``
:example:
Expand All @@ -187,6 +191,13 @@ def __init__(
>>> # define an element-wise check
>>> check_even = pa.Check(lambda x: x % 2 == 0, element_wise=True)
>>>
>>> # checks can be given human-readable metadata
>>> check_with_metadata = pa.Check(
>>> lambda x: True,
>>> title="Always passes",
>>> description="This check always passes."
>>> )
>>>
>>> # specify assertions across categorical variables using `groupby`,
>>> # for example, make sure the mean measure for group "A" is always
>>> # larger than the mean measure for group "B"
Expand Down Expand Up @@ -241,6 +252,8 @@ def __init__(
self.ignore_na = ignore_na
self.raise_warning = raise_warning
self.n_failure_cases = n_failure_cases
self.title = title
self.description = description

if groupby is None and groups is not None:
raise ValueError(
Expand Down
2 changes: 2 additions & 0 deletions pandera/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ def to_schema(cls) -> DataFrameSchema:
"name": cls.__config__.name,
"ordered": cls.__config__.ordered,
"unique": cls.__config__.unique,
"title": cls.__config__.title,
"description": cls.__config__.description or cls.__doc__,
}
cls.__schema__ = DataFrameSchema(
columns,
Expand Down
18 changes: 18 additions & 0 deletions pandera/model_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class FieldInfo:
"alias",
"original_name",
"dtype_kwargs",
"title",
"description",
)

def __init__(
Expand All @@ -69,6 +71,8 @@ def __init__(
alias: Any = None,
check_name: Optional[bool] = None,
dtype_kwargs: Optional[Dict[str, Any]] = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
self.checks = _to_checklist(checks)
self.nullable = nullable
Expand All @@ -80,6 +84,8 @@ def __init__(
self.check_name = check_name
self.original_name = cast(str, None) # always set by SchemaModel
self.dtype_kwargs = dtype_kwargs
self.title = title
self.description = description

@property
def name(self) -> str:
Expand Down Expand Up @@ -147,6 +153,8 @@ def to_column(
required=required,
name=name,
checks=checks,
title=self.title,
description=self.description,
)

def to_index(
Expand All @@ -165,6 +173,8 @@ def to_index(
coerce=self.coerce,
name=name,
checks=checks,
title=self.title,
description=self.description,
)


Expand Down Expand Up @@ -195,6 +205,8 @@ def Field(
alias: Any = None,
check_name: Optional[bool] = None,
dtype_kwargs: Optional[Dict[str, Any]] = None,
title: Optional[str] = None,
description: Optional[str] = None,
**kwargs,
) -> Any:
"""Used to provide extra information about a field of a SchemaModel.
Expand Down Expand Up @@ -229,6 +241,8 @@ def Field(
for columns and multi-index, and to `False` for a single index.
:param dtype_kwargs: The parameters to be forwarded to the type of the
field.
:param title: A human-readable label for the field.
:param description: An arbitrary textual description of the field.
:param kwargs: Specify custom checks that have been registered with the
:class:`~pandera.extensions.register_check_method` decorator.
"""
Expand Down Expand Up @@ -269,6 +283,8 @@ def Field(
regex=regex,
check_name=check_name,
alias=alias,
title=title,
description=description,
dtype_kwargs=dtype_kwargs,
)

Expand Down Expand Up @@ -362,6 +378,7 @@ def check(*fields, regex: bool = False, **check_kwargs) -> ClassCheck:

def _wrapper(fn: Union[classmethod, AnyCallable]) -> classmethod:
check_fn, check_method = _to_function_and_classmethod(fn)
check_kwargs.setdefault("description", fn.__doc__)
setattr(
check_method,
CHECK_KEY,
Expand All @@ -388,6 +405,7 @@ def dataframe_check(_fn=None, **check_kwargs) -> ClassCheck:

def _wrapper(fn: Union[classmethod, AnyCallable]) -> classmethod:
check_fn, check_method = _to_function_and_classmethod(fn)
check_kwargs.setdefault("description", fn.__doc__)
setattr(
check_method,
DATAFRAME_CHECK_KEY,
Expand Down
9 changes: 9 additions & 0 deletions pandera/schema_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def __init__(
name: Union[str, Tuple[str, ...], None] = None,
regex: bool = False,
pandas_dtype: PandasDtypeInputTypes = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
"""Create column validator object.
Expand Down Expand Up @@ -69,6 +71,9 @@ def __init__(
.. warning:: This option will be deprecated in 0.8.0
:param title: A human-readable label for the column.
:param description: An arbitrary textual description of the column.
:raises SchemaInitError: if impossible to build schema from parameters
:example:
Expand Down Expand Up @@ -97,6 +102,8 @@ def __init__(
coerce,
name,
pandas_dtype,
title,
description,
)
if (
name is not None
Expand Down Expand Up @@ -133,6 +140,8 @@ def properties(self) -> Dict[str, Any]:
"required": self.required,
"name": self._name,
"regex": self._regex,
"title": self.title,
"description": self.description,
}

def set_name(self, name: str):
Expand Down
38 changes: 38 additions & 0 deletions pandera/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ def __init__(
ordered: bool = False,
pandas_dtype: PandasDtypeInputTypes = None,
unique: Optional[Union[str, List[str]]] = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
"""Initialize DataFrameSchema validator.
Expand Down Expand Up @@ -136,6 +138,8 @@ def __init__(
.. warning:: This option will be deprecated in 0.8.0
:param unique: a list of columns that should be jointly unique.
:param title: A human-readable label for the schema.
:param description: An arbitrary textual description of the schema.
:raises SchemaInitError: if impossible to build schema from parameters
:raises SchemaInitError: if ``dtype`` and ``pandas_dtype`` are both
Expand Down Expand Up @@ -207,6 +211,8 @@ def __init__(
self._coerce = coerce
self._ordered = ordered
self._unique = unique
self._title = title
self._description = description
self._validate_schema()
self._set_column_names()

Expand Down Expand Up @@ -244,6 +250,16 @@ def ordered(self, value: bool) -> None:
"""Set ordered attribute"""
self._ordered = value

@property
def title(self):
"""A human-readable label for the schema."""
return self._title

@property
def description(self):
"""An arbitrary textual description of the schema."""
return self._description

# the _is_inferred getter and setter methods are not public
@property
def _is_inferred(self) -> bool:
Expand Down Expand Up @@ -1622,6 +1638,8 @@ def __init__(
coerce: bool = False,
name: Any = None,
pandas_dtype: PandasDtypeInputTypes = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
"""Initialize series schema base object.
Expand Down Expand Up @@ -1653,6 +1671,8 @@ def __init__(
.. warning:: This option will be deprecated in 0.8.0
:param title: A human-readable label for the series.
:param description: An arbitrary textual description of the series.
:type nullable: bool
"""
if checks is None:
Expand All @@ -1676,6 +1696,8 @@ def __init__(
self._checks = checks
self._name = name
self._unique = unique
self._title = title
self._description = description

for check in self.checks:
if check.groupby is not None and not self._allow_groupby:
Expand Down Expand Up @@ -1759,6 +1781,16 @@ def name(self) -> Union[str, None]:
"""Get SeriesSchema name."""
return self._name

@property
def title(self):
"""A human-readable label for the series."""
return self._title

@property
def description(self):
"""An arbitrary textual description of the series."""
return self._description

@property
def dtype(
self,
Expand Down Expand Up @@ -2078,6 +2110,8 @@ def __init__(
coerce: bool = False,
name: str = None,
pandas_dtype: PandasDtypeInputTypes = None,
title: Optional[str] = None,
description: Optional[str] = None,
) -> None:
"""Initialize series schema base object.
Expand Down Expand Up @@ -2107,6 +2141,8 @@ def __init__(
where ``pandas_dtype=None``.
:param name: series name.
:param pandas_dtype: alias of ``dtype`` for backwards compatibility.
:param title: A human-readable label for the series.
:param description: An arbitrary textual description of the series.
.. warning:: This option will be deprecated in 0.8.0
Expand All @@ -2120,6 +2156,8 @@ def __init__(
coerce,
name,
pandas_dtype,
title,
description,
)
self.index = index

Expand Down
2 changes: 2 additions & 0 deletions pandera/typing/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ class BaseConfig: # pylint:disable=R0903
"""

name: Optional[str] = None #: name of schema
title: Optional[str] = None #: human-readable label for schema
description: Optional[str] = None #: arbitrary textual description
coerce: bool = False #: coerce types of all schema components

#: make sure certain column combinations are unique
Expand Down
13 changes: 13 additions & 0 deletions tests/core/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,8 @@ class Config:
name = "Child schema"
strict = True
multiindex_strict = False
description = "foo"
title = "bar"

expected = pa.DataFrameSchema(
columns={"a": pa.Column(int), "b": pa.Column(int)},
Expand All @@ -686,11 +688,22 @@ class Config:
coerce=True,
strict=True,
ordered=True,
description="foo",
title="bar",
)

assert expected == Child.to_schema()


def test_config_docstrings() -> None:
class Model(pa.SchemaModel):
"""foo"""

a: Series[int]

assert Model.__doc__ == Model.to_schema().description


class Input(pa.SchemaModel):
a: Series[int]
b: Series[int]
Expand Down

0 comments on commit df1e826

Please sign in to comment.