Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

try pandera: add jupyterlite notebooks, add support for py3.7 #951

Merged
merged 4 commits into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.7", "3.8", "3.9", "3.10"]
pandas-version: ["1.2.0", "1.3.0", "latest"]
exclude:
- python-version: "3.10"
Expand Down Expand Up @@ -196,9 +196,9 @@ jobs:
uses: codecov/codecov-action@v3

- name: Check Docstrings
if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' }}
if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' && matrix.python-version != '3.7' }}
run: nox ${{ env.NOX_FLAGS }} --session doctests

- name: Check Docs
if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' }}
if: ${{ matrix.os != 'windows-latest' && matrix.python-version != '3.10' && matrix.python-version != '3.7' }}
run: nox ${{ env.NOX_FLAGS }} --session docs
22 changes: 15 additions & 7 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"sphinx_copybutton",
"recommonmark",
"sphinx_panels",
"jupyterlite_sphinx",
]

doctest_global_setup = """
Expand Down Expand Up @@ -192,15 +193,17 @@ def filter(self, record: pylogging.LogRecord) -> bool:
# that dataclass name is in the message, so that you don't filter out
# other meaningful warnings
return not (
record.getMessage().startswith(
"Cannot resolve forward reference in type annotations of "
'"pandera.typing.DataFrame"'
)
# NOTE: forward reference false positive needs to be handled
# correctly
or record.getMessage().startswith(
"Cannot resolve forward reference in type annotations of "
'"pandera.schemas.DataFrameSchema'
record.getMessage().startswith(
(
"Cannot resolve forward reference in type annotations of "
'"pandera.typing.DataFrame"',
"Cannot resolve forward reference in type annotations of "
'"pandera.schemas.DataFrameSchema',
"Cannot resolve forward reference in type annotations of "
'"pandera.typing.DataFrame.style"',
)
)
)

Expand Down Expand Up @@ -259,3 +262,8 @@ def linkcode_resolve(domain, info):
)

return f"https://github.com/pandera-dev/pandera/blob/{tag}/pandera/{fn}{linespec}"


# jupyterlite config
jupyterlite_contents = ["notebooks/try_pandera.ipynb"]
jupyterlite_bind_ipynb_suffix = False
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ page or reach out to the maintainers and pandera community on
:hidden:

self
Try Pandera ▶️ <try_pandera>

.. toctree::
:maxdepth: 6
Expand Down
11 changes: 11 additions & 0 deletions docs/source/jupyterlite_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"LiteBuildConfig": {
"federated_extensions": [
"https://conda.anaconda.org/conda-forge/noarch/pandera-0.12.0-hd8ed1ab_0.tar.bz2",
],
"ignore_sys_prefix": true,
"piplite_urls": [
"https://files.pythonhosted.org/packages/95/cc/e058935b0b34d50214596297f0a9edb0781fc5201bf2c6eb8cf1a026d710/pandera-0.12.0-py3-none-any.whl",
]
}
}
80 changes: 80 additions & 0 deletions docs/source/notebooks/try_pandera.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "ac4294bb",
"metadata": {},
"outputs": [],
"source": [
"import piplite\n",
"\n",
"\n",
"for package in [\n",
" \"wrapt\",\n",
" \"typing_extensions\",\n",
" \"mypy_extensions\",\n",
" \"typing_inspect\",\n",
" \"pydantic\",\n",
" \"pandera\",\n",
"]:\n",
" await piplite.install(package, deps=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9a4eef5",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pandera as pa\n",
"from pandera.typing import DataFrame, Series\n",
"\n",
"\n",
"class Schema(pa.SchemaModel):\n",
" item: Series[str] = pa.Field(isin=[\"apple\", \"orange\"], coerce=True)\n",
" price: Series[float] = pa.Field(gt=0)\n",
"\n",
"\n",
"@pa.check_types(lazy=True)\n",
"def transform_data(data: DataFrame[Schema]):\n",
" ...\n",
"\n",
"\n",
"data = pd.DataFrame.from_records([\n",
" {\"item\": \"applee\", \"price\": 0.5},\n",
" {\"item\": \"orange\", \"price\": -1000}\n",
"])\n",
"\n",
"\n",
"try:\n",
" transform_data(data)\n",
"except pa.errors.SchemaErrors as exc:\n",
" display(exc.failure_cases)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
7 changes: 7 additions & 0 deletions docs/source/try_pandera.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Try Pandera
===============

In the notebook below, you can get a sense of how to use pandera right in the
browser without having to install anything locally!

.. retrolite:: notebooks/try_pandera.ipynb
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ dependencies:
- python-multipart

# documentation
- jupyterlite_sphinx
- sphinx
- sphinx-panels
- sphinx-autodoc-typehints <= 1.14.1
Expand Down
6 changes: 5 additions & 1 deletion pandera/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@
Any,
Callable,
Iterable,
Literal,
Optional,
Tuple,
Type,
TypeVar,
Union,
)

try:
from typing import Literal
except ImportError:
from typing_extensions import Literal # type: ignore[misc]


class DataType(ABC):
"""Base class of all Pandera data types."""
Expand Down
14 changes: 14 additions & 0 deletions pandera/engines/pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
from . import engine, numpy_engine, utils
from .type_aliases import PandasDataType, PandasExtensionType, PandasObject

try:
import pyarrow # pylint:disable=unused-import

PYARROW_INSTALLED = True
except ImportError:
PYARROW_INSTALLED = False


def pandas_version():
"""Return the pandas version."""
Expand Down Expand Up @@ -605,6 +612,13 @@ class STRING(DataType, dtypes.String):
storage: Optional[Literal["python", "pyarrow"]] = "python"

def __post_init__(self):
if self.storage == "pyarrow" and not PYARROW_INSTALLED:
raise ModuleNotFoundError(
"pyarrow needs to be installed when using the "
"string[pyarrow] pandas data type. Please "
"`pip install pyarrow` or "
"`conda install -c conda-forge pyarrow` before proceeding."
)
type_ = pd.StringDtype(self.storage)
object.__setattr__(self, "type", type_)

Expand Down
7 changes: 6 additions & 1 deletion pandera/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
Any,
Dict,
List,
Literal,
Optional,
Type,
TypeVar,
Expand All @@ -41,6 +40,12 @@
from .error_handlers import SchemaErrorHandler
from .hypotheses import Hypothesis

try:
from typing import Literal
except ImportError:
from typing_extensions import Literal # type: ignore[misc]


if TYPE_CHECKING:
from pandera.schema_components import Column

Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ nox
importlib_metadata
uvicorn
python-multipart
jupyterlite_sphinx
sphinx
sphinx-panels
sphinx-autodoc-typehints <= 1.14.1
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,9 @@
"typing_extensions >= 3.7.4.3 ; python_version<'3.8'",
"typing_inspect >= 0.6.0",
"wrapt",
"pyarrow",
],
extras_require=extras_require,
python_requires=">=3.8",
python_requires=">=3.7",
platforms="any",
classifiers=[
"Development Status :: 5 - Production/Stable",
Expand All @@ -64,6 +63,7 @@
"Intended Audience :: Science/Research",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
}

nullable_string_dtypes = {pd.StringDtype: "string"}
if pa.PANDAS_1_3_0_PLUS:
if pa.PANDAS_1_3_0_PLUS and pandas_engine.PYARROW_INSTALLED:
nullable_string_dtypes.update(
{pd.StringDtype(storage="pyarrow"): "string[pyarrow]"}
)
Expand Down
60 changes: 43 additions & 17 deletions tests/core/test_from_to_format_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest

import pandera as pa
from pandera.engines import pandas_engine


class InSchema(pa.SchemaModel):
Expand Down Expand Up @@ -92,6 +93,19 @@ def invalid_input_dataframe() -> pd.DataFrame:
return pd.DataFrame({"str_col": ["a"]})


def _needs_pyarrow(schema) -> bool:
return (
schema
in {
InSchemaParquet,
InSchemaFeather,
OutSchemaParquet,
OutSchemaFeather,
}
and not pandas_engine.PYARROW_INSTALLED
)


@pytest.mark.parametrize(
"schema,to_fn,buf_cls",
[
Expand Down Expand Up @@ -122,24 +136,30 @@ def fn(df: pa.typing.DataFrame[schema]):
(mock_dataframe(), False),
(invalid_input_dataframe(), True),
]:

buf = None if buf_cls is None else buf_cls()
arg = to_fn(df, *([buf] if buf else []))
if buf:
if buf.closed:
pytest.skip(
"skip test for older pandas versions where to_pickle "
"closes user-provided buffers: "
"https://github.com/pandas-dev/pandas/issues/35679"
)
buf.seek(0)
arg = buf
if invalid:
with pytest.raises(pa.errors.SchemaError):
fn(arg)
return

out = fn(arg)
assert df.equals(out)

if _needs_pyarrow(schema):
with pytest.raises(ImportError):
to_fn(df, *([buf] if buf else []))
else:
arg = to_fn(df, *([buf] if buf else []))
if buf:
if buf.closed:
pytest.skip(
"skip test for older pandas versions where to_pickle "
"closes user-provided buffers: "
"https://github.com/pandas-dev/pandas/issues/35679"
)
buf.seek(0)
arg = buf
if invalid:
with pytest.raises(pa.errors.SchemaError):
fn(arg)
return

out = fn(arg)
assert df.equals(out)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -170,6 +190,12 @@ def invalid_fn(
return df

df = mock_dataframe()

if _needs_pyarrow(schema):
with pytest.raises((ImportError)):
fn(df)
return

try:
out = fn(df)
except IOError:
Expand Down
Loading