-
-
Notifications
You must be signed in to change notification settings - Fork 284
/
__init__.py
167 lines (133 loc) · 4.88 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""Base classes for parsing, validation, and error Reporting Backends.
These classes implement a common interface of operations needed for
data validation. These operations are exposed as methods that are composed
together to implement the pandera schema specification.
"""
from abc import ABC
from typing import Any, Dict, List, NamedTuple, Optional, Union
# from pandera.api.base.checks import BaseCheck
from pandera.errors import SchemaError, SchemaErrorReason
class CoreCheckResult(NamedTuple):
"""Namedtuple for holding results of core checks."""
passed: bool
check: Union[str, "BaseCheck"] # type: ignore
check_index: Optional[int] = None
check_output: Optional[Any] = None
reason_code: Optional[SchemaErrorReason] = None
message: Optional[str] = None
failure_cases: Optional[Any] = None
schema_error: Optional[SchemaError] = None
original_exc: Optional[Exception] = None
class CoreParserResult(NamedTuple):
"""Namedtuple for holding core parser results."""
class BaseSchemaBackend(ABC):
"""Abstract base class for a schema backend implementation."""
def preprocess(
self,
check_obj,
inplace: bool = False,
):
"""Preprocesses a check object before applying check functions."""
raise NotImplementedError
def subsample(
self,
check_obj,
head: Optional[int] = None,
tail: Optional[int] = None,
sample: Optional[int] = None,
random_state: Optional[int] = None,
):
"""Subsamples a check object before applying check functions."""
raise NotImplementedError
def validate(
self,
check_obj,
schema,
*,
head: Optional[int] = None,
tail: Optional[int] = None,
sample: Optional[int] = None,
random_state: Optional[int] = None,
lazy: bool = False,
inplace: bool = False,
):
"""
Parse and validate a check object, returning type-coerced and validated
object.
"""
raise NotImplementedError
def coerce_dtype(
self,
check_obj,
schema=None,
):
"""Coerce the data type of the check object."""
raise NotImplementedError
def run_check(
self,
check_obj,
schema,
check,
check_index: int,
*args,
):
"""Run a single check on the check object."""
raise NotImplementedError
def run_checks(self, check_obj, schema):
"""Run a list of checks on the check object."""
raise NotImplementedError
def run_schema_component_checks(
self,
check_obj,
schema_components,
lazy,
):
"""Run checks for all schema components."""
raise NotImplementedError
def check_name(self, check_obj, schema):
"""Core check that checks the name of the check object."""
raise NotImplementedError
def check_nullable(self, check_obj, schema):
"""Core check that checks the nullability of a check object."""
raise NotImplementedError
def check_unique(self, check_obj, schema):
"""Core check that checks the uniqueness of values in a check object."""
raise NotImplementedError
def check_dtype(self, check_obj, schema):
"""Core check that checks the data type of a check object."""
raise NotImplementedError
def failure_cases_metadata(
self, schema_name: str, schema_errors: List[SchemaError]
):
"""Get failure cases metadata for lazy validation."""
raise NotImplementedError
class BaseCheckBackend(ABC):
"""Abstract base class for a check backend implementation."""
def __init__(self, check): # pylint: disable=unused-argument
"""Initializes a check backend object."""
def __call__(self, check_obj, key=None):
raise NotImplementedError
def query(self, check_obj):
"""Implements querying behavior to produce subset of check object."""
raise NotImplementedError
def groupby(self, check_obj):
"""Implements groupby behavior for check object."""
raise NotImplementedError
def aggregate(self, check_obj):
"""Implements aggregation behavior for check object."""
raise NotImplementedError
def preprocess(self, check_obj, key):
"""Preprocesses a check object before applying the check function."""
raise NotImplementedError
def postprocess(self, check_obj, check_output):
"""Postprocesses the result of applying the check function."""
raise NotImplementedError
def apply(self, check_obj):
"""Apply the check function to a check object."""
raise NotImplementedError
def statistics(self):
"""Check statistics property."""
raise NotImplementedError
def strategy(self):
"""Return a data generation strategy."""
raise NotImplementedError