diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index a10bd305e..e05375073 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -86,7 +86,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.7", "3.8", "3.9", "3.10"] pandas-version: ["1.2.0", "1.3.0", "latest"] exclude: - python-version: "3.10" @@ -196,9 +196,9 @@ jobs: uses: codecov/codecov-action@v3 - name: Check Docstrings - if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' }} + if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' && matrix.python-version != '3.7' }} run: nox ${{ env.NOX_FLAGS }} --session doctests - name: Check Docs - if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' }} + if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' && matrix.python-version != '3.7' }} run: nox ${{ env.NOX_FLAGS }} --session docs diff --git a/docs/source/conf.py b/docs/source/conf.py index f9d526943..bc2b8b1d3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -48,6 +48,7 @@ "sphinx_copybutton", "recommonmark", "sphinx_panels", + "jupyterlite_sphinx", ] doctest_global_setup = """ @@ -192,15 +193,17 @@ def filter(self, record: pylogging.LogRecord) -> bool: # that dataclass name is in the message, so that you don't filter out # other meaningful warnings return not ( - record.getMessage().startswith( - "Cannot resolve forward reference in type annotations of " - '"pandera.typing.DataFrame"' - ) # NOTE: forward reference false positive needs to be handled # correctly - or record.getMessage().startswith( - "Cannot resolve forward reference in type annotations of " - '"pandera.schemas.DataFrameSchema' + record.getMessage().startswith( + ( + "Cannot resolve forward reference in type annotations of " + '"pandera.typing.DataFrame"', + "Cannot resolve forward reference in type annotations of " + '"pandera.schemas.DataFrameSchema', + "Cannot resolve forward reference in type annotations of " + '"pandera.typing.DataFrame.style"', + ) ) ) @@ -259,3 +262,8 @@ def linkcode_resolve(domain, info): ) return f"https://github.com/pandera-dev/pandera/blob/{tag}/pandera/{fn}{linespec}" + + +# jupyterlite config +jupyterlite_contents = ["notebooks/try_pandera.ipynb"] +jupyterlite_bind_ipynb_suffix = False diff --git a/docs/source/index.rst b/docs/source/index.rst index e85cc7ce7..776a43e86 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -345,6 +345,7 @@ page or reach out to the maintainers and pandera community on :hidden: self + Try Pandera ▶️ .. toctree:: :maxdepth: 6 diff --git a/docs/source/jupyterlite_config.json b/docs/source/jupyterlite_config.json new file mode 100644 index 000000000..a4199afdd --- /dev/null +++ b/docs/source/jupyterlite_config.json @@ -0,0 +1,11 @@ +{ + "LiteBuildConfig": { + "federated_extensions": [ + "https://conda.anaconda.org/conda-forge/noarch/pandera-0.12.0-hd8ed1ab_0.tar.bz2", + ], + "ignore_sys_prefix": true, + "piplite_urls": [ + "https://files.pythonhosted.org/packages/95/cc/e058935b0b34d50214596297f0a9edb0781fc5201bf2c6eb8cf1a026d710/pandera-0.12.0-py3-none-any.whl", + ] + } +} diff --git a/docs/source/notebooks/try_pandera.ipynb b/docs/source/notebooks/try_pandera.ipynb new file mode 100644 index 000000000..19d6ccf1c --- /dev/null +++ b/docs/source/notebooks/try_pandera.ipynb @@ -0,0 +1,80 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "ac4294bb", + "metadata": {}, + "outputs": [], + "source": [ + "import piplite\n", + "\n", + "\n", + "for package in [\n", + " \"wrapt\",\n", + " \"typing_extensions\",\n", + " \"mypy_extensions\",\n", + " \"typing_inspect\",\n", + " \"pydantic\",\n", + " \"pandera\",\n", + "]:\n", + " await piplite.install(package, deps=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9a4eef5", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pandera as pa\n", + "from pandera.typing import DataFrame, Series\n", + "\n", + "\n", + "class Schema(pa.SchemaModel):\n", + " item: Series[str] = pa.Field(isin=[\"apple\", \"orange\"], coerce=True)\n", + " price: Series[float] = pa.Field(gt=0)\n", + "\n", + "\n", + "@pa.check_types(lazy=True)\n", + "def transform_data(data: DataFrame[Schema]):\n", + " ...\n", + "\n", + "\n", + "data = pd.DataFrame.from_records([\n", + " {\"item\": \"applee\", \"price\": 0.5},\n", + " {\"item\": \"orange\", \"price\": -1000}\n", + "])\n", + "\n", + "\n", + "try:\n", + " transform_data(data)\n", + "except pa.errors.SchemaErrors as exc:\n", + " display(exc.failure_cases)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/try_pandera.rst b/docs/source/try_pandera.rst new file mode 100644 index 000000000..d5786072b --- /dev/null +++ b/docs/source/try_pandera.rst @@ -0,0 +1,7 @@ +Try Pandera +=============== + +In the notebook below, you can get a sense of how to use pandera right in the +browser without having to install anything locally! + +.. retrolite:: notebooks/try_pandera.ipynb diff --git a/environment.yml b/environment.yml index ce038fe8d..72db62ca2 100644 --- a/environment.yml +++ b/environment.yml @@ -63,6 +63,7 @@ dependencies: - python-multipart # documentation + - jupyterlite_sphinx - sphinx - sphinx-panels - sphinx-autodoc-typehints <= 1.14.1 diff --git a/pandera/dtypes.py b/pandera/dtypes.py index fd37381ea..e85e21b34 100644 --- a/pandera/dtypes.py +++ b/pandera/dtypes.py @@ -9,7 +9,6 @@ Any, Callable, Iterable, - Literal, Optional, Tuple, Type, @@ -17,6 +16,11 @@ Union, ) +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal # type: ignore[misc] + class DataType(ABC): """Base class of all Pandera data types.""" diff --git a/pandera/engines/pandas_engine.py b/pandera/engines/pandas_engine.py index 5f0e4db82..804c8481e 100644 --- a/pandera/engines/pandas_engine.py +++ b/pandera/engines/pandas_engine.py @@ -25,6 +25,13 @@ from . import engine, numpy_engine, utils from .type_aliases import PandasDataType, PandasExtensionType, PandasObject +try: + import pyarrow # pylint:disable=unused-import + + PYARROW_INSTALLED = True +except ImportError: + PYARROW_INSTALLED = False + def pandas_version(): """Return the pandas version.""" @@ -605,6 +612,13 @@ class STRING(DataType, dtypes.String): storage: Optional[Literal["python", "pyarrow"]] = "python" def __post_init__(self): + if self.storage == "pyarrow" and not PYARROW_INSTALLED: + raise ModuleNotFoundError( + "pyarrow needs to be installed when using the " + "string[pyarrow] pandas data type. Please " + "`pip install pyarrow` or " + "`conda install -c conda-forge pyarrow` before proceeding." + ) type_ = pd.StringDtype(self.storage) object.__setattr__(self, "type", type_) diff --git a/pandera/schemas.py b/pandera/schemas.py index 2f70d52f6..0b054109d 100644 --- a/pandera/schemas.py +++ b/pandera/schemas.py @@ -15,7 +15,6 @@ Any, Dict, List, - Literal, Optional, Type, TypeVar, @@ -41,6 +40,12 @@ from .error_handlers import SchemaErrorHandler from .hypotheses import Hypothesis +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal # type: ignore[misc] + + if TYPE_CHECKING: from pandera.schema_components import Column diff --git a/requirements-dev.txt b/requirements-dev.txt index 3ae459b8b..ab1b45b8c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -38,6 +38,7 @@ nox importlib_metadata uvicorn python-multipart +jupyterlite_sphinx sphinx sphinx-panels sphinx-autodoc-typehints <= 1.14.1 diff --git a/setup.py b/setup.py index 2510fedf0..931ade0c4 100644 --- a/setup.py +++ b/setup.py @@ -52,10 +52,9 @@ "typing_extensions >= 3.7.4.3 ; python_version<'3.8'", "typing_inspect >= 0.6.0", "wrapt", - "pyarrow", ], extras_require=extras_require, - python_requires=">=3.8", + python_requires=">=3.7", platforms="any", classifiers=[ "Development Status :: 5 - Production/Stable", @@ -64,6 +63,7 @@ "Intended Audience :: Science/Research", "Programming Language :: Python", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", diff --git a/tests/core/test_dtypes.py b/tests/core/test_dtypes.py index 92c033e3c..485297ab3 100644 --- a/tests/core/test_dtypes.py +++ b/tests/core/test_dtypes.py @@ -113,7 +113,7 @@ } nullable_string_dtypes = {pd.StringDtype: "string"} -if pa.PANDAS_1_3_0_PLUS: +if pa.PANDAS_1_3_0_PLUS and pandas_engine.PYARROW_INSTALLED: nullable_string_dtypes.update( {pd.StringDtype(storage="pyarrow"): "string[pyarrow]"} ) diff --git a/tests/core/test_from_to_format_conversions.py b/tests/core/test_from_to_format_conversions.py index 658701a50..f22fe7f9d 100644 --- a/tests/core/test_from_to_format_conversions.py +++ b/tests/core/test_from_to_format_conversions.py @@ -7,6 +7,7 @@ import pytest import pandera as pa +from pandera.engines import pandas_engine class InSchema(pa.SchemaModel): @@ -92,6 +93,19 @@ def invalid_input_dataframe() -> pd.DataFrame: return pd.DataFrame({"str_col": ["a"]}) +def _needs_pyarrow(schema) -> bool: + return ( + schema + in { + InSchemaParquet, + InSchemaFeather, + OutSchemaParquet, + OutSchemaFeather, + } + and not pandas_engine.PYARROW_INSTALLED + ) + + @pytest.mark.parametrize( "schema,to_fn,buf_cls", [ @@ -122,24 +136,30 @@ def fn(df: pa.typing.DataFrame[schema]): (mock_dataframe(), False), (invalid_input_dataframe(), True), ]: + buf = None if buf_cls is None else buf_cls() - arg = to_fn(df, *([buf] if buf else [])) - if buf: - if buf.closed: - pytest.skip( - "skip test for older pandas versions where to_pickle " - "closes user-provided buffers: " - "https://github.com/pandas-dev/pandas/issues/35679" - ) - buf.seek(0) - arg = buf - if invalid: - with pytest.raises(pa.errors.SchemaError): - fn(arg) - return - - out = fn(arg) - assert df.equals(out) + + if _needs_pyarrow(schema): + with pytest.raises(ImportError): + to_fn(df, *([buf] if buf else [])) + else: + arg = to_fn(df, *([buf] if buf else [])) + if buf: + if buf.closed: + pytest.skip( + "skip test for older pandas versions where to_pickle " + "closes user-provided buffers: " + "https://github.com/pandas-dev/pandas/issues/35679" + ) + buf.seek(0) + arg = buf + if invalid: + with pytest.raises(pa.errors.SchemaError): + fn(arg) + return + + out = fn(arg) + assert df.equals(out) @pytest.mark.parametrize( @@ -170,6 +190,12 @@ def invalid_fn( return df df = mock_dataframe() + + if _needs_pyarrow(schema): + with pytest.raises((ImportError)): + fn(df) + return + try: out = fn(df) except IOError: diff --git a/tests/core/test_schema_components.py b/tests/core/test_schema_components.py index 22073ff40..ad405626b 100644 --- a/tests/core/test_schema_components.py +++ b/tests/core/test_schema_components.py @@ -19,7 +19,7 @@ String, errors, ) -from pandera.engines.pandas_engine import Engine +from pandera.engines.pandas_engine import Engine, pandas_version def test_column() -> None: @@ -251,6 +251,10 @@ def tests_multi_index_subindex_coerce() -> None: schema(data, lazy=True) +@pytest.mark.skipif( + pandas_version().release <= (1, 3, 5), + reason="MultiIndex dtypes are buggy prior to pandas 1.4.*", +) @pytest.mark.parametrize("coerce", [True, False]) def tests_multi_index_subindex_coerce_with_empty_subindex(coerce) -> None: """MultIndex component should override each sub indexes dtype, @@ -263,7 +267,6 @@ def tests_multi_index_subindex_coerce_with_empty_subindex(coerce) -> None: ] data = pd.DataFrame(index=pd.MultiIndex.from_arrays([[]] * len(indexes))) - schema_override = DataFrameSchema(index=MultiIndex(indexes)) if coerce: @@ -275,7 +278,8 @@ def tests_multi_index_subindex_coerce_with_empty_subindex(coerce) -> None: ) else: with pytest.raises( - errors.SchemaErrors, match="A total of 2 schema errors were found" + errors.SchemaErrors, + match=r"A total of \d+ schema errors were found", ): schema_override(data, lazy=True) diff --git a/tests/fastapi/test_app.py b/tests/fastapi/test_app.py index 08f0c7ed5..054ab2688 100644 --- a/tests/fastapi/test_app.py +++ b/tests/fastapi/test_app.py @@ -41,6 +41,8 @@ def test_items_endpoint(app): data = {"name": "Book", "value": 10, "description": "Hello"} for _ in range(10): response = requests.post("http://127.0.0.1:8000/items/", json=data) + if response.status_code != 200: + time.sleep(3.0) assert response.json() == data diff --git a/tests/mypy/test_static_type_checking.py b/tests/mypy/test_static_type_checking.py index e8b48f3e5..feac37bf9 100644 --- a/tests/mypy/test_static_type_checking.py +++ b/tests/mypy/test_static_type_checking.py @@ -53,38 +53,24 @@ def test_mypy_pandas_dataframe(capfd) -> None: ) errors = _get_mypy_errors(capfd.readouterr().out) # assert error messages on particular lines of code - assert errors[35] == { - "msg": ( - 'Argument 1 to "pipe" of "DataFrame" has incompatible type ' - '"Type[pandera.typing.pandas.DataFrame[Any]]"; expected ' - '"Union[Callable[..., pandera.typing.pandas.DataFrame[SchemaOut]], ' - 'Tuple[Callable[..., pandera.typing.pandas.DataFrame[SchemaOut]], str]]"' - ), - "errcode": "arg-type", - } - assert errors[41] == { - "msg": ( - "Incompatible return value type (got " - '"pandas.core.frame.DataFrame", expected ' - '"pandera.typing.pandas.DataFrame[SchemaOut]")' - ), - "errcode": "return-value", - } - assert errors[54] == { - "msg": ( - 'Argument 1 to "fn" has incompatible type ' - '"pandas.core.frame.DataFrame"; expected ' - '"pandera.typing.pandas.DataFrame[Schema]"' - ), - "errcode": "arg-type", - } - assert errors[58] == { - "msg": ( - 'Argument 1 to "fn" has incompatible type ' - '"DataFrame[AnotherSchema]"; expected "DataFrame[Schema]"' - ), - "errcode": "arg-type", - } + assert errors[35]["errcode"] == "arg-type" + assert re.match( + 'Argument 1 to "pipe" of "[A-Za-z]+" has incompatible type', + errors[35]["msg"], + ) + + assert errors[41]["errcode"] == "return-value" + assert re.match("^Incompatible return value type", errors[41]["msg"]) + + assert errors[54]["errcode"] == "arg-type" + assert re.match( + '^Argument 1 to "fn" has incompatible type', errors[54]["msg"] + ) + + assert errors[58]["errcode"] == "arg-type" + assert re.match( + '^Argument 1 to "fn" has incompatible type', errors[58]["msg"] + ) @pytest.mark.parametrize(