unionai-oss · cosmicBboy · Feb 5, 2022 · Jan 31, 2022 · Feb 2, 2022 · Feb 3, 2022
diff --git a/pandera/checks.py b/pandera/checks.py
@@ -116,6 +116,8 @@ def __init__(
         error: Optional[str] = None,
         raise_warning: bool = False,
         n_failure_cases: Union[int, None] = constants.N_FAILURE_CASES,
+        title: Optional[str] = None,
+        description: Optional[str] = None,
         **check_kwargs,
     ) -> None:
         """Apply a validation function to each element, Series, or DataFrame.
@@ -173,6 +175,8 @@ def __init__(
             check is informational and shouldn't stop execution of the program.
         :param n_failure_cases: report the first n unique failure cases. If
             None, report all failure cases.
+        :param title: A human-readable label for the check.
+        :param description: An arbitrary textual description of the check.
         :param check_kwargs: key-word arguments to pass into ``check_fn``
 
         :example:
@@ -187,6 +191,13 @@ def __init__(
         >>> # define an element-wise check
         >>> check_even = pa.Check(lambda x: x % 2 == 0, element_wise=True)
         >>>
+        >>> # checks can be given human-readable metadata
+        >>> check_with_metadata = pa.Check(
+        >>>     lambda x: True,
+        >>>     title="Always passes",
+        >>>     description="This check always passes."
+        >>> )
+        >>>
         >>> # specify assertions across categorical variables using `groupby`,
         >>> # for example, make sure the mean measure for group "A" is always
         >>> # larger than the mean measure for group "B"
@@ -241,6 +252,8 @@ def __init__(
         self.ignore_na = ignore_na
         self.raise_warning = raise_warning
         self.n_failure_cases = n_failure_cases
+        self.title = title
+        self.description = description
 
         if groupby is None and groups is not None:
             raise ValueError(

diff --git a/pandera/model.py b/pandera/model.py
@@ -207,6 +207,8 @@ def to_schema(cls) -> DataFrameSchema:
                 "name": cls.__config__.name,
                 "ordered": cls.__config__.ordered,
                 "unique": cls.__config__.unique,
+                "title": cls.__config__.title,
+                "description": cls.__config__.description or cls.__doc__,
             }
         cls.__schema__ = DataFrameSchema(
             columns,

diff --git a/pandera/model_components.py b/pandera/model_components.py
@@ -56,6 +56,8 @@ class FieldInfo:
         "alias",
         "original_name",
         "dtype_kwargs",
+        "title",
+        "description",
     )
 
     def __init__(
@@ -69,6 +71,8 @@ def __init__(
         alias: Any = None,
         check_name: Optional[bool] = None,
         dtype_kwargs: Optional[Dict[str, Any]] = None,
+        title: Optional[str] = None,
+        description: Optional[str] = None,
     ) -> None:
         self.checks = _to_checklist(checks)
         self.nullable = nullable
@@ -80,6 +84,8 @@ def __init__(
         self.check_name = check_name
         self.original_name = cast(str, None)  # always set by SchemaModel
         self.dtype_kwargs = dtype_kwargs
+        self.title = title
+        self.description = description
 
     @property
     def name(self) -> str:
@@ -147,6 +153,8 @@ def to_column(
             required=required,
             name=name,
             checks=checks,
+            title=self.title,
+            description=self.description,
         )
 
     def to_index(
@@ -165,6 +173,8 @@ def to_index(
             coerce=self.coerce,
             name=name,
             checks=checks,
+            title=self.title,
+            description=self.description,
         )
 
 
@@ -195,6 +205,8 @@ def Field(
     alias: Any = None,
     check_name: Optional[bool] = None,
     dtype_kwargs: Optional[Dict[str, Any]] = None,
+    title: Optional[str] = None,
+    description: Optional[str] = None,
     **kwargs,
 ) -> Any:
     """Used to provide extra information about a field of a SchemaModel.
@@ -229,6 +241,8 @@ def Field(
         for columns and multi-index, and to `False` for a single index.
     :param dtype_kwargs: The parameters to be forwarded to the type of the
         field.
+    :param title: A human-readable label for the field.
+    :param description: An arbitrary textual description of the field.
     :param kwargs: Specify custom checks that have been registered with the
         :class:`~pandera.extensions.register_check_method` decorator.
     """
@@ -269,6 +283,8 @@ def Field(
         regex=regex,
         check_name=check_name,
         alias=alias,
+        title=title,
+        description=description,
         dtype_kwargs=dtype_kwargs,
     )
 
@@ -362,6 +378,7 @@ def check(*fields, regex: bool = False, **check_kwargs) -> ClassCheck:
 
     def _wrapper(fn: Union[classmethod, AnyCallable]) -> classmethod:
         check_fn, check_method = _to_function_and_classmethod(fn)
+        check_kwargs.setdefault("description", fn.__doc__)
         setattr(
             check_method,
             CHECK_KEY,
@@ -388,6 +405,7 @@ def dataframe_check(_fn=None, **check_kwargs) -> ClassCheck:
 
     def _wrapper(fn: Union[classmethod, AnyCallable]) -> classmethod:
         check_fn, check_method = _to_function_and_classmethod(fn)
+        check_kwargs.setdefault("description", fn.__doc__)
         setattr(
             check_method,
             DATAFRAME_CHECK_KEY,

diff --git a/pandera/schema_components.py b/pandera/schema_components.py
@@ -40,6 +40,8 @@ def __init__(
         name: Union[str, Tuple[str, ...], None] = None,
         regex: bool = False,
         pandas_dtype: PandasDtypeInputTypes = None,
+        title: Optional[str] = None,
+        description: Optional[str] = None,
     ) -> None:
         """Create column validator object.
 
@@ -69,6 +71,9 @@ def __init__(
 
             .. warning:: This option will be deprecated in 0.8.0
 
+        :param title: A human-readable label for the column.
+        :param description: An arbitrary textual description of the column.
+
         :raises SchemaInitError: if impossible to build schema from parameters
 
         :example:
@@ -97,6 +102,8 @@ def __init__(
             coerce,
             name,
             pandas_dtype,
+            title,
+            description,
         )
         if (
             name is not None
@@ -133,6 +140,8 @@ def properties(self) -> Dict[str, Any]:
             "required": self.required,
             "name": self._name,
             "regex": self._regex,
+            "title": self.title,
+            "description": self.description,
         }
 
     def set_name(self, name: str):

diff --git a/pandera/schemas.py b/pandera/schemas.py
@@ -100,6 +100,8 @@ def __init__(
         ordered: bool = False,
         pandas_dtype: PandasDtypeInputTypes = None,
         unique: Optional[Union[str, List[str]]] = None,
+        title: Optional[str] = None,
+        description: Optional[str] = None,
     ) -> None:
         """Initialize DataFrameSchema validator.
 
@@ -136,6 +138,8 @@ def __init__(
             .. warning:: This option will be deprecated in 0.8.0
 
         :param unique: a list of columns that should be jointly unique.
+        :param title: A human-readable label for the schema.
+        :param description: An arbitrary textual description of the schema.
 
         :raises SchemaInitError: if impossible to build schema from parameters
         :raises SchemaInitError: if ``dtype`` and ``pandas_dtype`` are both
@@ -207,6 +211,8 @@ def __init__(
         self._coerce = coerce
         self._ordered = ordered
         self._unique = unique
+        self._title = title
+        self._description = description
         self._validate_schema()
         self._set_column_names()
 
@@ -244,6 +250,16 @@ def ordered(self, value: bool) -> None:
         """Set ordered attribute"""
         self._ordered = value
 
+    @property
+    def title(self):
+        """A human-readable label for the schema."""
+        return self._title
+
+    @property
+    def description(self):
+        """An arbitrary textual description of the schema."""
+        return self._description
+
     # the _is_inferred getter and setter methods are not public
     @property
     def _is_inferred(self) -> bool:
@@ -1622,6 +1638,8 @@ def __init__(
         coerce: bool = False,
         name: Any = None,
         pandas_dtype: PandasDtypeInputTypes = None,
+        title: Optional[str] = None,
+        description: Optional[str] = None,
     ) -> None:
         """Initialize series schema base object.
 
@@ -1653,6 +1671,8 @@ def __init__(
 
             .. warning:: This option will be deprecated in 0.8.0
 
+        :param title: A human-readable label for the series.
+        :param description: An arbitrary textual description of the series.
         :type nullable: bool
         """
         if checks is None:
@@ -1676,6 +1696,8 @@ def __init__(
         self._checks = checks
         self._name = name
         self._unique = unique
+        self._title = title
+        self._description = description
 
         for check in self.checks:
             if check.groupby is not None and not self._allow_groupby:
@@ -1759,6 +1781,16 @@ def name(self) -> Union[str, None]:
         """Get SeriesSchema name."""
         return self._name
 
+    @property
+    def title(self):
+        """A human-readable label for the series."""
+        return self._title
+
+    @property
+    def description(self):
+        """An arbitrary textual description of the series."""
+        return self._description
+
     @property
     def dtype(
         self,
@@ -2078,6 +2110,8 @@ def __init__(
         coerce: bool = False,
         name: str = None,
         pandas_dtype: PandasDtypeInputTypes = None,
+        title: Optional[str] = None,
+        description: Optional[str] = None,
     ) -> None:
         """Initialize series schema base object.
 
@@ -2107,6 +2141,8 @@ def __init__(
             where ``pandas_dtype=None``.
         :param name: series name.
         :param pandas_dtype: alias of ``dtype`` for backwards compatibility.
+        :param title: A human-readable label for the series.
+        :param description: An arbitrary textual description of the series.
 
             .. warning:: This option will be deprecated in 0.8.0
 
@@ -2120,6 +2156,8 @@ def __init__(
             coerce,
             name,
             pandas_dtype,
+            title,
+            description,
         )
         self.index = index
 

diff --git a/pandera/strategies.py b/pandera/strategies.py
@@ -213,8 +213,8 @@ def _wrapper(cls, *args, **kwargs):
 # pylint: disable=line-too-long
 # Values taken from
 # https://hypothesis.readthedocs.io/en/latest/_modules/hypothesis/extra/numpy.html#from_dtype  # noqa
-MIN_DT_VALUE = -(2 ** 63)
-MAX_DT_VALUE = 2 ** 63 - 1
+MIN_DT_VALUE = -(2**63)
+MAX_DT_VALUE = 2**63 - 1
 
 
 def _is_datetime_tz(pandera_dtype: DataType) -> bool:

diff --git a/pandera/typing/config.py b/pandera/typing/config.py
@@ -12,6 +12,8 @@ class BaseConfig:  # pylint:disable=R0903
     """
 
     name: Optional[str] = None  #: name of schema
+    title: Optional[str] = None  #: human-readable label for schema
+    description: Optional[str] = None  #: arbitrary textual description
     coerce: bool = False  #: coerce types of all schema components
 
     #: make sure certain column combinations are unique

diff --git a/tests/core/test_model.py b/tests/core/test_model.py
@@ -673,6 +673,8 @@ class Config:
             name = "Child schema"
             strict = True
             multiindex_strict = False
+            description = "foo"
+            title = "bar"
 
     expected = pa.DataFrameSchema(
         columns={"a": pa.Column(int), "b": pa.Column(int)},
@@ -686,10 +688,19 @@ class Config:
         coerce=True,
         strict=True,
         ordered=True,
+        description="foo",
+        title="bar"
     )
 
     assert expected == Child.to_schema()
 
+def test_config_docstrings() -> None:
+
+    class Model(pa.SchemaModel):
+        """foo"""
+        a: Series[int]
+
+    assert Model.__doc__ == Model.to_schema().description
 
 class Input(pa.SchemaModel):
     a: Series[int]