Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix mypy extra unit tests, pin pandas-stubs for dev env #1056

Merged
merged 8 commits into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ env:
# Increase this value to reset cache if environment.yml has not changed
CACHE_VERSION: 6

concurrency:
group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
cancel-in-progress: true

jobs:

lint:
Expand Down Expand Up @@ -93,7 +97,7 @@ jobs:
PYTEST_FLAGS: --cov=pandera --cov-report=term-missing --cov-report=xml --cov-append
HYPOTHESIS_FLAGS: -n=auto -q --hypothesis-profile=ci
strategy:
fail-fast: true
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: ["3.7", "3.8", "3.9", "3.10"]
Expand Down
4 changes: 3 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ repos:
entry: mypy
language: python
types: [python]
files: (^pandera/|^tests/|^scripts/)
pass_filenames: false
exclude: (^docs/|^tests/mypy/modules/)
require_serial: true
args: ["pandera", "tests", "scripts"]
verbose: true
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- pydantic

# mypy extra
- pandas-stubs
- pandas-stubs <= 1.5.2.221213

# pyspark extra
- pyspark >= 3.2.0
Expand Down
8 changes: 8 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[mypy]
ignore_missing_imports = True
follow_imports = skip
allow_redefinition = True
warn_return_any = False
warn_unused_configs = True
show_error_codes = True
exclude = tests/mypy/modules
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def requirements(session: Session) -> None: # pylint:disable=unused-argument
print(f"{REQUIREMENT_PATH} has been re-generated ✨ 🍰 ✨")
raise err

ignored_pkgs = {"black", "pandas"}
ignored_pkgs = {"black", "pandas", "pandas-stubs"}
mismatched = []
# only compare package versions, not python version markers.
str_dev_reqs = [str(x) for x in DEV_REQUIREMENTS]
Expand Down
2 changes: 1 addition & 1 deletion pandera/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def _prepare_series_input(
if check_utils.is_field(df_or_series):
return df_or_series # type: ignore[return-value]
elif self.groupby is None:
return df_or_series[column] # type: ignore[index]
return df_or_series[column] # type: ignore
elif isinstance(self.groupby, list):
return self._format_groupby_input( # type: ignore[return-value]
df_or_series.groupby(self.groupby)[column], # type: ignore[index]
Expand Down
14 changes: 7 additions & 7 deletions pandera/engines/pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def coerce_value(self, value: Any) -> decimal.Decimal:
return dec.quantize(self._exp, context=self._ctx)

def coerce(self, data_container: PandasObject) -> PandasObject:
return data_container.apply(self.coerce_value)
return data_container.apply(self.coerce_value) # type: ignore

def check( # type: ignore
self,
Expand Down Expand Up @@ -577,7 +577,7 @@ def __init__( # pylint:disable=super-init-not-called
object.__setattr__(
self,
"type",
pd.CategoricalDtype(self.categories, self.ordered),
pd.CategoricalDtype(self.categories, self.ordered), # type: ignore
)

def coerce(self, data_container: PandasObject) -> PandasObject:
Expand Down Expand Up @@ -639,13 +639,13 @@ def __str__(self) -> str:
else:

@Engine.register_dtype(
equivalents=["string", pd.StringDtype, pd.StringDtype()]
) # type: ignore
equivalents=["string", pd.StringDtype, pd.StringDtype()] # type: ignore
)
@immutable
class STRING(DataType, dtypes.String): # type: ignore
"""Semantic representation of a :class:`pandas.StringDtype`."""

type = pd.StringDtype()
type = pd.StringDtype() # type: ignore


@Engine.register_dtype(
Expand Down Expand Up @@ -984,8 +984,8 @@ def __post_init__(self):
def from_parametrized_dtype(cls, pd_dtype: pd.SparseDtype):
"""Convert a :class:`pandas.SparseDtype` to
a Pandera :class:`pandera.engines.pandas_engine.Sparse`."""
return cls( # type: ignore
dtype=pd_dtype.subtype, fill_value=pd_dtype.fill_value
return cls(
dtype=pd_dtype.subtype, fill_value=pd_dtype.fill_value # type: ignore
)


Expand Down
6 changes: 3 additions & 3 deletions pandera/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def get_dtypes(self, dataframe: pd.DataFrame) -> Dict[str, DataType]:
)
return {
**{n: c.dtype for n, c in self.columns.items() if not c.regex},
**regex_dtype,
**regex_dtype, # type: ignore
}

@property
Expand Down Expand Up @@ -595,7 +595,7 @@ def _validate(
is_schema_col = column in expanded_column_names
if (self.strict is True) and not is_schema_col:
msg = (
f"column '{column}' not in {self.__class__.__name__}"
f"column {column!r} not in {self.__class__.__name__}"
f" {self.columns}"
)
error_handler.collect_error(
Expand All @@ -621,7 +621,7 @@ def _validate(
errors.SchemaError(
self,
check_obj,
message=f"column '{column}' out-of-order",
message=f"column {column!r} out-of-order",
failure_cases=scalar_failure_case(column),
check="column_ordered",
),
Expand Down
2 changes: 1 addition & 1 deletion pandera/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _mask(
) -> Union[pd.Series, pd.Index]:
if pd.api.types.is_timedelta64_dtype(val): # type: ignore [arg-type]
return val.mask(null_mask, pd.NaT) # type: ignore [union-attr,arg-type]
elif val.dtype == pd.StringDtype():
elif val.dtype == pd.StringDtype(): # type: ignore [call-arg]
return val.mask(null_mask, pd.NA) # type: ignore [union-attr,arg-type]
return val.mask(null_mask) # type: ignore [union-attr]

Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ typing_extensions >= 3.7.4.3
frictionless
pyarrow
pydantic
pandas-stubs
pandas-stubs <= 1.5.2.221213
pyspark >= 3.2.0
modin
protobuf <= 3.20.3
Expand Down
8 changes: 0 additions & 8 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
[isort]
float_to_top = true
profile = black

[mypy]
ignore_missing_imports = True
allow_redefinition = True
warn_return_any = False
warn_unused_configs = True
show_error_codes = True
exclude = tests/mypy/modules
14 changes: 7 additions & 7 deletions tests/core/test_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,13 +900,13 @@ def validate_union(
) -> typing.Union[DataFrame[OnlyZeroesSchema], DataFrame[OnlyOnesSchema]]:
return df

validate_union(pd.DataFrame({"a": [0, 0]}))
validate_union(pd.DataFrame({"a": [1, 1]}))
validate_union(pd.DataFrame({"a": [0, 0]})) # type: ignore [arg-type]
validate_union(pd.DataFrame({"a": [1, 1]})) # type: ignore [arg-type]

with pytest.raises(errors.SchemaErrors):
validate_union(pd.DataFrame({"a": [0, 1]}))
validate_union(pd.DataFrame({"a": [0, 1]})) # type: ignore [arg-type]
with pytest.raises(errors.SchemaErrors):
validate_union(pd.DataFrame({"a": [2, 2]}))
validate_union(pd.DataFrame({"a": [2, 2]})) # type: ignore [arg-type]

@check_types
def validate_union_wrong_outputs(
Expand All @@ -916,10 +916,10 @@ def validate_union_wrong_outputs(
) -> typing.Union[DataFrame[OnlyZeroesSchema], DataFrame[OnlyOnesSchema]]:
new_df = df.copy()
new_df["a"] = [0, 1]
return new_df
return new_df # type: ignore [return-value]

with pytest.raises(errors.SchemaErrors):
validate_union_wrong_outputs(pd.DataFrame({"a": [0, 0]}))
validate_union_wrong_outputs(pd.DataFrame({"a": [0, 0]})) # type: ignore [arg-type]


def test_check_types_non_dataframes() -> None:
Expand Down Expand Up @@ -947,7 +947,7 @@ def union_df_int_types_pydantic_check(
) -> typing.Union[DataFrame[OnlyZeroesSchema], int]:
return val

union_df_int_types_pydantic_check(pd.DataFrame({"a": [0, 0]}))
union_df_int_types_pydantic_check(pd.DataFrame({"a": [0, 0]})) # type: ignore [arg-type]
int_val_pydantic = union_df_int_types_pydantic_check(5)
str_val_pydantic = union_df_int_types_pydantic_check("5") # type: ignore[arg-type]
assert isinstance(int_val_pydantic, int)
Expand Down
13 changes: 7 additions & 6 deletions tests/core/test_logical_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ def test_logical_datatype_check(
"data, expected_datatype, failure_cases",
[
(
[Decimal("1.2"), Decimal("12.3")],
[Decimal("1.2"), Decimal("12.3")] * 100,
pandas_engine.Decimal(2, 1),
[Decimal("12.3")],
[Decimal("12.3")] * 100,
),
(
[Decimal("1.2"), None, pd.NA, np.nan],
[Decimal("1.2"), None, pd.NA, np.nan] * 100,
pandas_engine.Decimal(19, 5),
[],
),
Expand All @@ -129,14 +129,15 @@ def test_logical_datatype_check(
pd.NA,
np.nan,
pd.NaT,
],
]
* 100,
pandas_engine.Date(),
[],
),
(
["2022-01-01", "01/01/2022"],
["2022-01-01", "01/01/2022"] * 100,
pandas_engine.Date(to_datetime_kwargs={"format": "%Y-%m-%d"}),
["01/01/2022"],
["01/01/2022"] * 100,
),
],
)
Expand Down
12 changes: 6 additions & 6 deletions tests/modin/test_schemas_on_modin.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,16 +335,16 @@ class Schema(pa.SchemaModel):

valid_df = mpd.DataFrame(
{
"int_field": [1, 2, 3],
"float_field": [-1.1, -2.1, -3.1],
"str_field": ["a", "b", "c"],
"int_field": [1, 2, 3] * 10,
"float_field": [-1.1, -2.1, -3.1] * 10,
"str_field": ["a", "b", "c"] * 10,
}
)
invalid_df = mpd.DataFrame(
{
"int_field": [-1],
"field_field": [1],
"str_field": ["d"],
"int_field": [-1] * 100,
"field_field": [1] * 100,
"str_field": ["d"] * 100,
}
)

Expand Down
2 changes: 1 addition & 1 deletion tests/mypy/modules/pandas_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def fn_mutate_inplace(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:

@pa.check_types
def fn_assign_and_get_index(df: DataFrame[Schema]) -> DataFrame[SchemaOut]:
return df.assign(foo=30).iloc[:3]
return df.assign(foo=30).iloc[:3] # mypy error
# error: Incompatible return value type (got "pandas.core.frame.DataFrame",
# expected "pandera.typing.pandas.DataFrame[SchemaOut]") [return-value]

Expand Down
10 changes: 5 additions & 5 deletions tests/mypy/modules/pandas_time.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# pylint: skip-file
import pandas as pd

pd.Timestamp.now() + pd.tseries.offsets.YearEnd(1) # false positive
pd.Timestamp.now() + pd.tseries.offsets.YearEnd(1)

pd.Timedelta(minutes=2) # false positive
pd.Timedelta(2, unit="minutes") # false positive
pd.Timedelta(minutes=2)
pd.Timedelta(2, unit="minutes")

pd.Timedelta(minutes=2, seconds=30) # false positive
pd.Timedelta(2.5, unit="minutes") # false positive
pd.Timedelta(minutes=2, seconds=30)
pd.Timedelta(2.5, unit="minutes") # mypy error
pd.Timedelta(2, unit="minutes") + pd.Timedelta(30, unit="seconds")
36 changes: 26 additions & 10 deletions tests/mypy/test_static_type_checking.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,19 @@
test_module_dir = Path(os.path.dirname(__file__))


def _get_mypy_errors(stdout) -> typing.List[typing.Dict[str, str]]:
def _get_mypy_errors(
module_name: str,
stdout,
) -> typing.List[typing.Dict[str, str]]:
"""Parse line number and error message."""
errors: typing.List[typing.Dict[str, str]] = []
# last line is summary of errors
for error in [x for x in stdout.split("\n") if x != ""][:-1]:
matches = re.match(
r".+\.py:(?P<lineno>\d+): error: (?P<msg>.+) \[(?P<errcode>.+)\]",
error,
regex = (
r".+{}:".format(module_name.replace(".", r"\."))
+ r"(?P<lineno>\d+): error: (?P<msg>.+) \[(?P<errcode>.+)\]"
)
matches = re.match(regex, error)
if matches is not None:
match_dict = matches.groupdict()
errors.append(
Expand All @@ -53,16 +57,21 @@ def _get_mypy_errors(stdout) -> typing.List[typing.Dict[str, str]]:
def test_mypy_pandas_dataframe(capfd) -> None:
"""Test that mypy raises expected errors on pandera-decorated functions."""
# pylint: disable=subprocess-run-check
cache_dir = str(test_module_dir / ".mypy_cache" / "test-mypy-default")
subprocess.run(
[
sys.executable,
"-m",
"mypy",
str(test_module_dir / "modules" / "pandas_dataframe.py"),
"--cache-dir",
cache_dir,
"--config-file",
str(test_module_dir / "config" / "no_plugin.ini"),
],
text=True,
)
errors = _get_mypy_errors(capfd.readouterr().out)
errors = _get_mypy_errors("pandas_dataframe.py", capfd.readouterr().out)
assert len(PANDAS_DATAFRAME_ERRORS) == len(errors)
for expected, error in zip(PANDAS_DATAFRAME_ERRORS, errors):
assert error["errcode"] == expected["errcode"]
Expand Down Expand Up @@ -97,6 +106,13 @@ def test_pandera_runtime_errors(fn) -> None:
{"msg": 'Argument 1 to "fn" has incompatible type', "errcode": "arg-type"},
] * 2

PANDAS_TIME_ERRORS = [
{
"msg": 'Argument 1 to "Timedelta" has incompatible type "float"',
"errcode": "arg-type",
},
]

PYTHON_SLICE_ERRORS = [
{"msg": "Slice index must be an integer or None", "errcode": "misc"},
]
Expand Down Expand Up @@ -129,12 +145,12 @@ def test_pandera_runtime_errors(fn) -> None:
["pandera_types.py", "plugin_mypy.ini", PANDERA_TYPES_ERRORS],
["pandas_concat.py", "no_plugin.ini", []],
["pandas_concat.py", "plugin_mypy.ini", []],
["pandas_time.py", "no_plugin.ini", []],
["pandas_time.py", "plugin_mypy.ini", []],
["pandas_time.py", "no_plugin.ini", PANDAS_TIME_ERRORS],
["pandas_time.py", "plugin_mypy.ini", PANDAS_TIME_ERRORS],
["python_slice.py", "no_plugin.ini", PYTHON_SLICE_ERRORS],
["python_slice.py", "plugin_mypy.ini", PYTHON_SLICE_ERRORS],
["pandas_index.py", "no_plugin.ini", PANDAS_INDEX_ERRORS],
["pandas_index.py", "plugin_mypy.ini", PANDAS_INDEX_ERRORS],
["pandas_index.py", "no_plugin.ini", []],
["pandas_index.py", "plugin_mypy.ini", []],
["pandas_series.py", "no_plugin.ini", PANDAS_SERIES_ERRORS],
["pandas_series.py", "plugin_mypy.ini", PANDAS_SERIES_ERRORS],
],
Expand All @@ -160,7 +176,7 @@ def test_pandas_stubs_false_positives(
]
# pylint: disable=subprocess-run-check
subprocess.run(commands, text=True)
resulting_errors = _get_mypy_errors(capfd.readouterr().out)
resulting_errors = _get_mypy_errors(module, capfd.readouterr().out)
assert len(expected_errors) == len(resulting_errors)
for expected, error in zip(expected_errors, resulting_errors):
assert error["errcode"] == expected["errcode"]
Expand Down