diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e17e65ed..fc3c020b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ exclude: (^asv_bench|setup.py|requirements-dev.txt) repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v4.1.0 hooks: - id: check-ast description: Simply check whether files parse as valid python @@ -24,18 +24,18 @@ repos: description: Replaces or checks mixed line ending - repo: https://github.com/pre-commit/mirrors-isort - rev: v5.9.3 + rev: v5.10.1 hooks: - id: isort args: ["--line-length=79", "--skip=docs/source/conf.py"] - repo: https://github.com/ikamensh/flynt - rev: '0.69' + rev: '0.76' hooks: - id: flynt - repo: https://github.com/psf/black - rev: 22.1.0 + rev: 22.3.0 hooks: - id: black args: ["--line-length=79"] @@ -48,7 +48,7 @@ repos: exclude: (^docs/|^scripts) - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.921 + rev: v0.942 hooks: - id: mypy entry: mypy pandera tests diff --git a/asv_bench/benchmarks/series_schema.py b/asv_bench/benchmarks/series_schema.py index 7a3680b2..747a2c55 100644 --- a/asv_bench/benchmarks/series_schema.py +++ b/asv_bench/benchmarks/series_schema.py @@ -20,7 +20,7 @@ def setup(self): Check(lambda x: len(x) > 3, element_wise=True) ], nullable=False, - allow_duplicates=True, + unique=False, name="my_series") self.series = pd.Series(["foobar", "foobar", "foobar"], name="my_series") diff --git a/docs/source/reference/dtypes.rst b/docs/source/reference/dtypes.rst index 488bcf75..97c80c99 100644 --- a/docs/source/reference/dtypes.rst +++ b/docs/source/reference/dtypes.rst @@ -119,18 +119,3 @@ Engines pandera.engines.engine.Engine pandera.engines.numpy_engine.Engine pandera.engines.pandas_engine.Engine - - -PandasDtype Enum ----------------- - -.. warning:: - - This class deprecated and will be removed from the pandera API in ``0.9.0`` - -.. autosummary:: - :toctree: generated - :template: class.rst - :nosignatures: - - pandera.engines.pandas_engine.PandasDtype diff --git a/docs/source/schema_models.rst b/docs/source/schema_models.rst index 1c2c805a..280ea52f 100644 --- a/docs/source/schema_models.rst +++ b/docs/source/schema_models.rst @@ -228,9 +228,6 @@ however, a couple of gotchas. Dtype aliases ^^^^^^^^^^^^^ -:mod:`pandera.typing` aliases will be deprecated in a future version, -please use :class:`~pandera.dtypes.DataType` subclasses instead. - .. code-block:: import pandera as pa diff --git a/docs/source/series_schemas.rst b/docs/source/series_schemas.rst index 14b82f63..3c29b4db 100644 --- a/docs/source/series_schemas.rst +++ b/docs/source/series_schemas.rst @@ -27,7 +27,7 @@ The :class:`~pandera.schemas.SeriesSchema` class allows for the validation of pa pa.Check(lambda x: len(x) > 3, element_wise=True) ], nullable=False, - allow_duplicates=True, + unique=False, name="my_series") validated_series = schema.validate( diff --git a/pandera/__init__.py b/pandera/__init__.py index bcaec8c8..e960db02 100644 --- a/pandera/__init__.py +++ b/pandera/__init__.py @@ -42,9 +42,8 @@ UINT16, UINT32, UINT64, + pandas_version, ) -from pandera.engines.pandas_engine import _PandasDtype as PandasDtype -from pandera.engines.pandas_engine import pandas_version from . import errors, pandas_accessor, typing from .checks import Check @@ -128,8 +127,6 @@ "UINT32", "UINT64", # pandera.engines.pandas_engine - "PandasDtype", - # pandera.engines.pandas_engine "pandas_version", # checks "Check", diff --git a/pandera/deprecations.py b/pandera/deprecations.py deleted file mode 100644 index f06b489b..00000000 --- a/pandera/deprecations.py +++ /dev/null @@ -1,38 +0,0 @@ -"""Utility functions for deprecating features.""" - -import inspect -import warnings -from functools import wraps - -from pandera.errors import SchemaInitError - - -def deprecate_pandas_dtype(fn): - """ - __init__ decorator for raising SchemaInitError or warnings based on - the dtype and pandas_dtype input. - """ - - @wraps(fn) - def wrapper(*args, **kwargs): - """__init__ method wrapper for raising deprecation warning.""" - sig = inspect.signature(fn) - bound_args = sig.bind(*args, **kwargs) - dtype = bound_args.arguments.get("dtype", None) - pandas_dtype = bound_args.arguments.get("pandas_dtype", None) - - msg = ( - "`pandas_dtype` is deprecated and will be removed as an " - "option in pandera v0.9.0, use `dtype` instead." - ) - - if dtype is not None and pandas_dtype is not None: - raise SchemaInitError( - f"`dtype` and `pandas_dtype` cannot both be specified. {msg}" - ) - if pandas_dtype is not None: - warnings.warn(msg, DeprecationWarning) - - return fn(*args, **kwargs) - - return wrapper diff --git a/pandera/engines/pandas_engine.py b/pandera/engines/pandas_engine.py index d9ce5213..0fd5cfd4 100644 --- a/pandera/engines/pandas_engine.py +++ b/pandera/engines/pandas_engine.py @@ -11,7 +11,6 @@ import datetime import inspect import warnings -from enum import Enum from typing import Any, Dict, Iterable, List, Optional, Type, Union import numpy as np @@ -803,107 +802,3 @@ def _coerce_row(row): ), ) return coerced_df.drop(["failure_cases"], axis="columns") - - -class PandasDtype(Enum): - # pylint: disable=line-too-long,invalid-name - """Enumerate all valid pandas data types. - - This class simply enumerates the valid numpy dtypes for pandas arrays. - For convenience ``PandasDtype`` enums can all be accessed in the top-level - ``pandera`` name space via the same enum name. - - .. warning:: - - This class is deprecated and will be removed in pandera v0.9.0. Use - python types, pandas type string aliases, numpy dtypes, or pandas - dtypes instead. See :ref:`dtypes` for details. - - :examples: - - >>> import pandas as pd - >>> import pandera as pa - >>> - >>> - >>> pa.SeriesSchema(pa.PandasDtype.Int).validate(pd.Series([1, 2, 3])) - 0 1 - 1 2 - 2 3 - dtype: int64 - >>> pa.SeriesSchema(pa.PandasDtype.Float).validate(pd.Series([1.1, 2.3, 3.4])) - 0 1.1 - 1 2.3 - 2 3.4 - dtype: float64 - >>> pa.SeriesSchema(pa.PandasDtype.String).validate(pd.Series(["a", "b", "c"])) - 0 a - 1 b - 2 c - dtype: object - - """ - - # numpy data types - Bool = "bool" #: ``"bool"`` numpy dtype - DateTime = "datetime64" #: ``"datetime64[ns]"`` numpy dtype - Timedelta = "timedelta64" #: ``"timedelta64[ns]"`` numpy dtype - Float = "float" #: ``"float"`` numpy dtype - Float16 = "float16" #: ``"float16"`` numpy dtype - Float32 = "float32" #: ``"float32"`` numpy dtype - Float64 = "float64" #: ``"float64"`` numpy dtype - Int = "int" #: ``"int"`` numpy dtype - Int8 = "int8" #: ``"int8"`` numpy dtype - Int16 = "int16" #: ``"int16"`` numpy dtype - Int32 = "int32" #: ``"int32"`` numpy dtype - Int64 = "int64" #: ``"int64"`` numpy dtype - UInt8 = "uint8" #: ``"uint8"`` numpy dtype - UInt16 = "uint16" #: ``"uint16"`` numpy dtype - UInt32 = "uint32" #: ``"uint32"`` numpy dtype - UInt64 = "uint64" #: ``"uint64"`` numpy dtype - Object = "object" #: ``"object"`` numpy dtype - Complex = "complex" #: ``"complex"`` numpy dtype - Complex64 = "complex64" #: ``"complex"`` numpy dtype - Complex128 = "complex128" #: ``"complex"`` numpy dtype - Complex256 = "complex256" #: ``"complex"`` numpy dtype - - # pandas data types - Category = "category" #: pandas ``"categorical"`` datatype - INT8 = "Int8" #: ``"Int8"`` pandas dtype:: pandas 0.24.0+ - INT16 = "Int16" #: ``"Int16"`` pandas dtype: pandas 0.24.0+ - INT32 = "Int32" #: ``"Int32"`` pandas dtype: pandas 0.24.0+ - INT64 = "Int64" #: ``"Int64"`` pandas dtype: pandas 0.24.0+ - FLOAT32 = "Float32" #: ``"Float32"`` pandas dtype: pandas 1.2.0+ - FLOAT64 = "Float64" #: ``"Float64"`` pandas dtype: pandas 1.2.0+ - UINT8 = "UInt8" #: ``"UInt8"`` pandas dtype: pandas 0.24.0+ - UINT16 = "UInt16" #: ``"UInt16"`` pandas dtype: pandas 0.24.0+ - UINT32 = "UInt32" #: ``"UInt32"`` pandas dtype: pandas 0.24.0+ - UINT64 = "UInt64" #: ``"UInt64"`` pandas dtype: pandas 0.24.0+ - String = "str" #: ``"str"`` numpy dtype - - #: ``"string"`` pandas dtypes: pandas 1.0.0+. For <1.0.0, this enum will - #: fall back on the str-as-object-array representation. - STRING = "string" - - -# NOTE: This is a hack to raise a deprecation warning to show for users who -# are still using the PandasDtype enum. -# pylint:disable=invalid-name -class __PandasDtype__: - def __init__(self): - self.pandas_dtypes = PandasDtype - - def __getattr__(self, name): - warnings.warn( - "The PandasDtype class is deprecated and will be removed in " - "pandera v0.9.0. Use python types, pandas type string aliases, " - "numpy dtypes, or pandas dtypes instead.", - DeprecationWarning, - ) - return Engine.dtype(getattr(self.pandas_dtypes, name).value) - - def __iter__(self): - for k in self.pandas_dtypes: - yield k.name - - -_PandasDtype = __PandasDtype__() diff --git a/pandera/io.py b/pandera/io.py index 07ade954..58ac7f86 100644 --- a/pandera/io.py +++ b/pandera/io.py @@ -197,9 +197,6 @@ def _deserialize_component_stats(serialized_component_stats): "name", "nullable", "unique", - # deserialize allow_duplicates property for backwards - # compatibility. Remove this for 0.8.0 release - "allow_duplicates", "coerce", "required", "regex", diff --git a/pandera/model.py b/pandera/model.py index 50c21d52..0ea53170 100644 --- a/pandera/model.py +++ b/pandera/model.py @@ -225,7 +225,7 @@ def to_schema(cls) -> DataFrameSchema: columns, index=index, checks=cls.__dataframe_checks__, # type: ignore - **kwargs, + **kwargs, # type: ignore ) if cls not in MODEL_CACHE: MODEL_CACHE[cls] = cls.__schema__ # type: ignore diff --git a/pandera/model_components.py b/pandera/model_components.py index d1fa1a45..72946579 100644 --- a/pandera/model_components.py +++ b/pandera/model_components.py @@ -49,7 +49,6 @@ class FieldInfo: "checks", "nullable", "unique", - "allow_duplicates", "coerce", "regex", "check_name", @@ -65,7 +64,6 @@ def __init__( checks: Optional[_CheckList] = None, nullable: bool = False, unique: bool = False, - allow_duplicates: Optional[bool] = None, coerce: bool = False, regex: bool = False, alias: Any = None, @@ -77,7 +75,6 @@ def __init__( self.checks = _to_checklist(checks) self.nullable = nullable self.unique = unique - self.allow_duplicates = allow_duplicates self.coerce = coerce self.regex = regex self.alias = alias @@ -124,30 +121,29 @@ def __set__(self, instance: Any, value: Any) -> None: # pragma: no cover def _to_schema_component( self, - pandas_dtype: PandasDtypeInputTypes, + dtype: PandasDtypeInputTypes, component: Type[SchemaComponent], checks: _CheckList = None, **kwargs: Any, ) -> SchemaComponent: if self.dtype_kwargs: - pandas_dtype = pandas_dtype(**self.dtype_kwargs) # type: ignore + dtype = dtype(**self.dtype_kwargs) # type: ignore checks = self.checks + _to_checklist(checks) - return component(pandas_dtype, checks=checks, **kwargs) # type: ignore + return component(dtype, checks=checks, **kwargs) # type: ignore def to_column( self, - pandas_dtype: PandasDtypeInputTypes, + dtype: PandasDtypeInputTypes, checks: _CheckList = None, required: bool = True, name: str = None, ) -> Column: """Create a schema_components.Column from a field.""" return self._to_schema_component( - pandas_dtype, + dtype, Column, nullable=self.nullable, unique=self.unique, - allow_duplicates=self.allow_duplicates, coerce=self.coerce, regex=self.regex, required=required, @@ -159,17 +155,16 @@ def to_column( def to_index( self, - pandas_dtype: PandasDtypeInputTypes, + dtype: PandasDtypeInputTypes, checks: _CheckList = None, name: str = None, ) -> Index: """Create a schema_components.Index from a field.""" return self._to_schema_component( - pandas_dtype, + dtype, Index, nullable=self.nullable, unique=self.unique, - allow_duplicates=self.allow_duplicates, coerce=self.coerce, name=name, checks=checks, @@ -196,7 +191,6 @@ def Field( str_startswith: Optional[str] = None, nullable: bool = False, unique: bool = False, - allow_duplicates: Optional[bool] = None, coerce: bool = False, regex: bool = False, ignore_na: bool = True, @@ -221,14 +215,6 @@ def Field( :param nullable: Whether or not the column/index can contain null values. :param unique: Whether column values should be unique. - :param allow_duplicates: Whether or not column can contain duplicate - values. - - .. warning:: - - This option will be deprecated in 0.8.0. Use the ``unique`` - argument instead. - :param coerce: coerces the data type if ``True``. :param regex: whether or not the field name or alias is a regex pattern. :param ignore_na: whether or not to ignore null values in the checks. @@ -278,7 +264,6 @@ def Field( checks=checks or None, nullable=nullable, unique=unique, - allow_duplicates=allow_duplicates, coerce=coerce, regex=regex, check_name=check_name, diff --git a/pandera/schema_components.py b/pandera/schema_components.py index 6af33b2b..3363ef64 100644 --- a/pandera/schema_components.py +++ b/pandera/schema_components.py @@ -9,7 +9,6 @@ from . import check_utils, errors from . import strategies as st -from .deprecations import deprecate_pandas_dtype from .error_handlers import SchemaErrorHandler from .schemas import ( CheckList, @@ -27,50 +26,35 @@ def _is_valid_multiindex_tuple_str(x: Tuple[Any, ...]) -> bool: class Column(SeriesSchemaBase): """Validate types and properties of DataFrame columns.""" - @deprecate_pandas_dtype def __init__( self, dtype: PandasDtypeInputTypes = None, checks: CheckList = None, nullable: bool = False, unique: bool = False, - allow_duplicates: Optional[bool] = None, coerce: bool = False, required: bool = True, name: Union[str, Tuple[str, ...], None] = None, regex: bool = False, - pandas_dtype: PandasDtypeInputTypes = None, title: Optional[str] = None, description: Optional[str] = None, ) -> None: """Create column validator object. - :param dtype: datatype of the column. A ``PandasDtype`` for - type-checking dataframe. If a string is specified, then assumes + :param dtype: datatype of the column. The datatype for type-checking + a dataframe. If a string is specified, then assumes one of the valid pandas string values: http://pandas.pydata.org/pandas-docs/stable/basics.html#dtypes :param checks: checks to verify validity of the column :param nullable: Whether or not column can contain null values. :param unique: whether column values should be unique - :param allow_duplicates: Whether or not column can contain duplicate - values. - - .. warning:: - - This option will be deprecated in 0.8.0. Use the ``unique`` - argument instead. - :param coerce: If True, when schema.validate is called the column will be coerced into the specified dtype. This has no effect on columns - where ``pandas_dtype=None``. + where ``dtype=None``. :param required: Whether or not column is allowed to be missing :param name: column name in dataframe to validate. :param regex: whether the ``name`` attribute should be treated as a regex pattern to apply to multiple columns in a dataframe. - :param pandas_dtype: alias of ``dtype`` for backwards compatibility. - - .. warning:: This option will be deprecated in 0.8.0 - :param title: A human-readable label for the column. :param description: An arbitrary textual description of the column. @@ -98,10 +82,8 @@ def __init__( checks, nullable, unique, - allow_duplicates, coerce, name, - pandas_dtype, title, description, ) diff --git a/pandera/schemas.py b/pandera/schemas.py index 22960616..0292a6b1 100644 --- a/pandera/schemas.py +++ b/pandera/schemas.py @@ -13,10 +13,10 @@ from typing import ( TYPE_CHECKING, Any, - Callable, Dict, List, Optional, + Type, TypeVar, Union, cast, @@ -29,7 +29,6 @@ from . import check_utils, errors from . import strategies as st from .checks import Check -from .deprecations import deprecate_pandas_dtype from .dtypes import DataType from .engines import pandas_engine from .error_formatters import ( @@ -57,6 +56,7 @@ str, type, DataType, + Type, pd.core.dtypes.base.ExtensionDtype, np.dtype, None, @@ -87,19 +87,16 @@ def _wrapper(schema, *args, **kwargs): class DataFrameSchema: # pylint: disable=too-many-public-methods """A light-weight pandas DataFrame validator.""" - @deprecate_pandas_dtype def __init__( self, - columns: Optional[Dict[str, Column]] = None, + columns: Optional[Dict[Any, Column]] = None, checks: CheckList = None, index=None, dtype: PandasDtypeInputTypes = None, - transformer: Callable = None, coerce: bool = False, strict: Union[bool, str] = False, name: Optional[str] = None, ordered: bool = False, - pandas_dtype: PandasDtypeInputTypes = None, unique: Optional[Union[str, List[str]]] = None, unique_column_names: bool = False, title: Optional[str] = None, @@ -117,17 +114,9 @@ def __init__( types specified in any of the columns. If a string is specified, then assumes one of the valid pandas string values: http://pandas.pydata.org/pandas-docs/stable/basics.html#dtypes. - :param transformer: a callable with signature: - pandas.DataFrame -> pandas.DataFrame. If specified, calling - `validate` will verify properties of the columns and return the - transformed dataframe object. - - .. warning:: This feature is deprecated and no longer has an effect - on validated dataframes. - :param coerce: whether or not to coerce all of the columns on validation. This has no effect on columns where - ``pandas_dtype=None`` + ``dtype=None`` :param strict: ensure that all and only the columns defined in the schema are present in the dataframe. If set to 'filter', only the columns in the schema will be passed to the validated @@ -135,18 +124,12 @@ def __init__( are not present in the dataframe, will throw an error. :param name: name of the schema. :param ordered: whether or not to validate the columns order. - :param pandas_dtype: alias of ``dtype`` for backwards compatibility. - - .. warning:: This option will be deprecated in 0.8.0 - :param unique: a list of columns that should be jointly unique. :param unique_column_names: whether or not column names must be unique. :param title: A human-readable label for the schema. :param description: An arbitrary textual description of the schema. :raises SchemaInitError: if impossible to build schema from parameters - :raises SchemaInitError: if ``dtype`` and ``pandas_dtype`` are both - supplied. :examples: @@ -186,16 +169,6 @@ def __init__( self.columns: Dict[Any, Column] = {} if columns is None else columns - if transformer is not None: - warnings.warn( - "The `transformers` argument has been deprecated and will no " - "longer have any effect on validated dataframes. To achieve " - "the same goal, you can apply the function to the validated " - "data with `transformer(schema(df))` or " - "`schema(df).pipe(transformer)`", - DeprecationWarning, - ) - if strict not in ( False, True, @@ -210,7 +183,7 @@ def __init__( self.index = index self.strict: Union[bool, str] = strict self.name: Optional[str] = name - self.dtype: PandasDtypeInputTypes = dtype or pandas_dtype # type: ignore + self.dtype: PandasDtypeInputTypes = dtype # type: ignore self._coerce = coerce self._ordered = ordered self._unique = unique @@ -1165,9 +1138,6 @@ def update_columns( unique_column_names=False )> - .. note:: This is the successor to the ``update_column`` method, which - will be deprecated. - """ new_schema = copy.deepcopy(self) @@ -1692,17 +1662,14 @@ def _pydantic_validate(cls, schema: Any) -> "DataFrameSchema": class SeriesSchemaBase: """Base series validator object.""" - @deprecate_pandas_dtype def __init__( self, dtype: PandasDtypeInputTypes = None, checks: CheckList = None, nullable: bool = False, unique: bool = False, - allow_duplicates: Optional[bool] = None, coerce: bool = False, name: Any = None, - pandas_dtype: PandasDtypeInputTypes = None, title: Optional[str] = None, description: Optional[str] = None, ) -> None: @@ -1720,22 +1687,10 @@ def __init__( :param nullable: Whether or not column can contain null values. :param unique: Whether or not column can contain duplicate values. - :param allow_duplicates: Whether or not column can contain duplicate - values. - - .. warning:: - - This option will be deprecated in 0.8.0. Use the ``unique`` - argument instead. - :param coerce: If True, when schema.validate is called the column will be coerced into the specified dtype. This has no effect on columns where ``dtype=None``. :param name: column name in dataframe to validate. - :param pandas_dtype: alias of ``dtype`` for backwards compatibility. - - .. warning:: This option will be deprecated in 0.8.0 - :param title: A human-readable label for the series. :param description: An arbitrary textual description of the series. :type nullable: bool @@ -1745,17 +1700,7 @@ def __init__( if isinstance(checks, (Check, Hypothesis)): checks = [checks] - if allow_duplicates is not None: - warnings.warn( - "The `allow_duplicates` will be deprecated in " - "favor of the `unique` keyword. The value of " - "`unique` will be set to the opposite of " - "the `allow_duplicates` keyword.", - DeprecationWarning, - ) - unique = not allow_duplicates - - self.dtype = dtype or pandas_dtype # type: ignore + self.dtype = dtype # type: ignore self._nullable = nullable self._coerce = coerce self._checks = checks @@ -1827,16 +1772,6 @@ def unique(self, value: bool) -> None: """Set unique attribute""" self._unique = value - @property - def allow_duplicates(self) -> bool: - """Whether to allow duplicate values.""" - return not self._unique - - @allow_duplicates.setter - def allow_duplicates(self, value: bool) -> None: - """Set allow_duplicates attribute.""" - self._unique = not value - @property def coerce(self) -> bool: """Whether to coerce series to specified type.""" @@ -2169,7 +2104,6 @@ def _pydantic_validate( # type: ignore class SeriesSchema(SeriesSchemaBase): """Series validator.""" - @deprecate_pandas_dtype def __init__( self, dtype: PandasDtypeInputTypes = None, @@ -2177,10 +2111,8 @@ def __init__( index=None, nullable: bool = False, unique: bool = False, - allow_duplicates: Optional[bool] = None, coerce: bool = False, name: str = None, - pandas_dtype: PandasDtypeInputTypes = None, title: Optional[str] = None, description: Optional[str] = None, ) -> None: @@ -2199,34 +2131,21 @@ def __init__( :param nullable: Whether or not column can contain null values. :param unique: Whether or not column can contain duplicate values. - :param allow_duplicates: Whether or not column can contain duplicate - values. - - .. warning:: - - This option will be deprecated in 0.8.0. Use the ``unique`` - argument instead. - :param coerce: If True, when schema.validate is called the column will be coerced into the specified dtype. This has no effect on columns - where ``pandas_dtype=None``. + where ``dtype=None``. :param name: series name. - :param pandas_dtype: alias of ``dtype`` for backwards compatibility. :param title: A human-readable label for the series. :param description: An arbitrary textual description of the series. - .. warning:: This option will be deprecated in 0.8.0 - """ super().__init__( dtype, checks, nullable, unique, - allow_duplicates, coerce, name, - pandas_dtype, title, description, ) diff --git a/tests/core/test_deprecations.py b/tests/core/test_deprecations.py deleted file mode 100644 index 647db201..00000000 --- a/tests/core/test_deprecations.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Unit tests for deprecated features.""" - -import pytest - -import pandera as pa -from pandera.system import FLOAT_128_AVAILABLE - - -@pytest.mark.parametrize( - "schema_cls,as_pos_arg", - [ - [pa.DataFrameSchema, False], - [pa.SeriesSchema, True], - [pa.Column, True], - [pa.Index, True], - ], -) -def test_deprecate_pandas_dtype(schema_cls, as_pos_arg): - """Test that pandas_dtype deprecation warnings/errors are raised.""" - assert schema_cls(dtype=int).dtype.check(pa.Int()) - with pytest.warns(DeprecationWarning): - assert schema_cls(pandas_dtype=int).dtype.check(pa.Int()) - - with pytest.warns(DeprecationWarning): - schema_cls(pandas_dtype=int) - with pytest.raises(pa.errors.SchemaInitError): - schema_cls(dtype=int, pandas_dtype=int) - - if as_pos_arg: - assert schema_cls(int).dtype.check(pa.Int()) - with pytest.raises(pa.errors.SchemaInitError): - schema_cls(int, pandas_dtype=int) - - -@pytest.mark.parametrize( - "schema_cls", - [ - pa.DataFrameSchema, - pa.SeriesSchema, - pa.Column, - pa.Index, - ], -) -def test_deprecate_pandas_dtype_enum(schema_cls): - """Test that using the PandasDtype enum raises a DeprecationWarning.""" - for attr in pa.PandasDtype: - if not FLOAT_128_AVAILABLE and attr in { - "Float128", - "Complex256", - }: - continue - with pytest.warns(DeprecationWarning): - pandas_dtype = getattr(pa.PandasDtype, attr) - schema_cls(dtype=pandas_dtype) diff --git a/tests/core/test_schema_components.py b/tests/core/test_schema_components.py index 4e876853..d54d0129 100644 --- a/tests/core/test_schema_components.py +++ b/tests/core/test_schema_components.py @@ -562,17 +562,17 @@ def test_column_type_can_be_set() -> None: column_a = Column(Int, name="a") changed_type = Float - column_a.dtype = Float + column_a.dtype = Float # type: ignore [assignment] assert column_a.dtype == Engine.dtype(changed_type) for invalid_dtype in ("foobar", "bar"): with pytest.raises(TypeError): - column_a.dtype = invalid_dtype + column_a.dtype = invalid_dtype # type: ignore [assignment] for invalid_dtype in (1, 2.2, ["foo", 1, 1.1], {"b": 1}): with pytest.raises(TypeError): - column_a.dtype = invalid_dtype + column_a.dtype = invalid_dtype # type: ignore [assignment] @pytest.mark.parametrize( diff --git a/tests/core/test_schemas.py b/tests/core/test_schemas.py index 8bffea50..ff74b312 100644 --- a/tests/core/test_schemas.py +++ b/tests/core/test_schemas.py @@ -908,7 +908,7 @@ def _assert_bool_case(old_schema, new_schema): assert getattr(new_schema.columns["col"], bool_kwarg) return [ - Column(int, **{bool_kwarg: False}), + Column(int, **{bool_kwarg: False}), # type: ignore "col", {bool_kwarg: True}, _assert_bool_case, @@ -934,7 +934,6 @@ def _assert_bool_case(old_schema, new_schema): "coerce", "required", "regex", - "allow_duplicates", "unique", ] ], @@ -1527,12 +1526,6 @@ def fail_without_msg(data): assert failure_without_msg == "ValueError()" -def test_schema_transformer_deprecated() -> None: - """Using the transformer argument should raise a deprecation warning.""" - with pytest.warns(DeprecationWarning): - DataFrameSchema(transformer=lambda df: df) - - @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.parametrize( "from_dtype,to_dtype", @@ -1704,7 +1697,6 @@ def test_update_columns(schema_simple: DataFrameSchema) -> None: assert test_schema.columns["col1"].coerce is True assert test_schema.columns["col2"].dtype == Engine.dtype(int) assert test_schema.columns["col2"].unique - assert not test_schema.columns["col2"].allow_duplicates # Errors with pytest.raises(errors.SchemaInitError): @@ -1866,27 +1858,3 @@ def test_column_set_unique(): assert not test_schema.columns["a"].unique test_schema = test_schema.update_column("a", unique=True) assert test_schema.columns["a"].unique - - -def test_unique_and_set_duplicates_setters() -> None: - """Test the setting of `unique` and `allow_duplicates` properties""" - test_schema = DataFrameSchema( - columns={ - "a": Column(int, unique=True), - }, - unique=None, - ) - assert not test_schema.columns["a"].allow_duplicates - test_schema.columns["a"].unique = False - assert test_schema.columns["a"].allow_duplicates - test_schema.columns["a"].allow_duplicates = False - assert test_schema.columns["a"].unique - test_schema.columns["a"].allow_duplicates = True - assert not test_schema.columns["a"].unique - - test_schema.unique = "a" - assert test_schema.unique == ["a"] - test_schema.unique = ["a"] - assert test_schema.unique == ["a"] - test_schema.unique = None - assert not test_schema.unique diff --git a/tests/io/test_io.py b/tests/io/test_io.py index 588608c7..3457726e 100644 --- a/tests/io/test_io.py +++ b/tests/io/test_io.py @@ -97,7 +97,7 @@ def _create_schema(index="single"): "optional_props_column": pandera.Column( pandera.String, nullable=True, - allow_duplicates=True, + unique=False, coerce=True, required=False, regex=True, diff --git a/tests/strategies/test_strategies.py b/tests/strategies/test_strategies.py index 8d93dda7..256b6526 100644 --- a/tests/strategies/test_strategies.py +++ b/tests/strategies/test_strategies.py @@ -144,7 +144,7 @@ def test_check_strategy_continuous(data_type, data): def value_ranges(data_type: pa.DataType): - """Strategy to generate value range based on PandasDtype""" + """Strategy to generate value range based on the pandas datatype.""" kwargs = dict( allow_nan=False, allow_infinity=False,