From d22e63c26f8b1dfe414cdb87c39076eadafec110 Mon Sep 17 00:00:00 2001 From: Joseph Perez Date: Thu, 11 Nov 2021 22:11:36 +0100 Subject: [PATCH 1/5] Remove arbitrary exception conversion to ValidationError --- apischema/conversions/converters.py | 9 +++- apischema/deserialization/methods.py | 27 ++-------- apischema/validation/errors.py | 24 ++++++++- apischema/validation/validators.py | 4 -- docs/conversions.md | 80 +++++++++++++++------------- docs/validation.md | 29 +++++++++- examples/field_validator.py | 2 +- examples/validator.py | 2 +- examples/validator_function.py | 2 +- examples/validator_inheritance.py | 2 +- tests/validation/test_validator.py | 6 +-- 11 files changed, 113 insertions(+), 74 deletions(-) diff --git a/apischema/conversions/converters.py b/apischema/conversions/converters.py index 94d7dfda..e49ed3a8 100644 --- a/apischema/conversions/converters.py +++ b/apischema/conversions/converters.py @@ -29,6 +29,7 @@ from apischema.types import AnyType from apischema.typing import is_type_var from apischema.utils import get_args2, get_origin_or_type, stop_signature_abuse +from apischema.validation.errors import ValidationError if TYPE_CHECKING: pass @@ -181,7 +182,13 @@ def reset_serializer(cls: Type): def as_str(cls: Cls) -> Cls: - deserializer(Conversion(cls, source=str)) + def wrapper(data): + try: + return cls(data) + except ValueError as err: + raise ValidationError([str(err)]) + + deserializer(Conversion(wrapper, source=str, target=cls)) serializer(Conversion(str, source=cls)) return cls diff --git a/apischema/deserialization/methods.py b/apischema/deserialization/methods.py index 633f8afe..52181807 100644 --- a/apischema/deserialization/methods.py +++ b/apischema/deserialization/methods.py @@ -21,7 +21,6 @@ from apischema.validation.errors import ValidationError, merge_errors from apischema.validation.mock import ValidatorMock from apischema.validation.validators import Validator, validate -from apischema.visitor import Unsupported if TYPE_CHECKING: pass @@ -480,17 +479,7 @@ def deserialize(self, data: Any) -> Any: raise error elif field_errors or errors: raise ValidationError(errors, field_errors) - try: - res = self.cls(**values) - except (AssertionError, ValidationError): - raise - except TypeError as err: - if str(err).startswith("__init__() got"): - raise Unsupported(self.cls) - else: - raise ValidationError([str(err)]) - except Exception as err: - raise ValidationError([str(err)]) + res = self.cls(**values) if self.validators: validate(res, validators2, init, aliaser=self.aliaser) return res @@ -651,12 +640,7 @@ class ConversionMethod(DeserializationMethod): method: DeserializationMethod def deserialize(self, data: Any) -> Any: - try: - return self.converter(self.method.deserialize(data)) - except (ValidationError, AssertionError): - raise - except Exception as err: - raise ValidationError([str(err)]) + return self.converter(self.method.deserialize(data)) @dataclass @@ -678,11 +662,6 @@ def deserialize(self, data: Any) -> Any: except ValidationError as err: error = merge_errors(error, err) else: - try: - return alternative.converter(value) - except (ValidationError, AssertionError): - raise - except Exception as err: - raise ValidationError([str(err)]) + return alternative.converter(value) assert error is not None raise error diff --git a/apischema/validation/errors.py b/apischema/validation/errors.py index b1606326..d34bcc7f 100644 --- a/apischema/validation/errors.py +++ b/apischema/validation/errors.py @@ -46,11 +46,33 @@ class LocalizedError(TypedDict): LocalizedError = Mapping[str, Any] # type: ignore -@dataclass +@dataclass(frozen=True) class ValidationError(Exception): messages: Sequence[ErrorMsg] = field(default_factory=list) children: Mapping[ErrorKey, "ValidationError"] = field(default_factory=dict) + @overload + def __init__(self, __message: str): + ... + + @overload + def __init__( + self, + messages: Sequence[ErrorMsg] = None, + children: Mapping[ErrorKey, "ValidationError"] = None, + ): + ... + + def __init__( + self, + messages: Union[ErrorMsg, Sequence[ErrorMsg]] = None, + children: Mapping[ErrorKey, "ValidationError"] = None, + ): + if isinstance(messages, str): + messages = [messages] + super().__setattr__("messages", messages or []) + super().__setattr__("children", children or {}) + def __str__(self): return repr(self) diff --git a/apischema/validation/validators.py b/apischema/validation/validators.py index e418e9fc..89a1c3f8 100644 --- a/apischema/validation/validators.py +++ b/apischema/validation/validators.py @@ -135,10 +135,6 @@ def validate( except NonTrivialDependency as exc: exc.validator = validator raise - except AssertionError: - raise - except Exception as e: - err = ValidationError([str(e)]) else: continue if validator.field is not None: diff --git a/docs/conversions.md b/docs/conversions.md index 0874e125..c591cebb 100644 --- a/docs/conversions.md +++ b/docs/conversions.md @@ -160,90 +160,98 @@ Serialized methods can also have dedicated conversions for their return {!serialized_conversions.py!} ``` -## String conversions -A common pattern of conversion concerns classes that have a string constructor and a `__str__` method; standard types `uuid.UUID`, `pathlib.Path`, `ipaddress.IPv4Address` are concerned. Using `apischema.conversions.as_str` will register a string-deserializer from the constructor and a string-serializer from the `__str__` method. +## Default conversions + +As with almost every default behavior in *apischema*, default conversions can be configured using `apischema.settings.deserialization.default_conversion`/`apischema.settings.serialization.default_conversion`. The initial value of these settings are the function which retrieved conversions registered with `deserializer`/`serializer`. + +You can for example [support *attrs*](examples/attrs_support.md) classes with this feature: ```python -{!as_str.py!} +{!examples/attrs_support.py!} ``` -!!! note - Previously mentioned standard types are handled by *apischema* using `as_str`. +*apischema* functions (`deserialize`/`serialize`/`deserialization_schema`/`serialization_schema`/`definitions_schema`) also have a `default_conversion` parameter to dynamically modify default conversions. See [FAQ](#whats-the-difference-between-conversion-and-default_conversion-parameters) for the difference between `conversion` and `default_conversion` parameters. -## Use `Enum` names +## Sub-conversions -`Enum` subclasses are (de)serialized using values. However, you may want to use enumeration names instead, that's why *apischema* provides `apischema.conversion.as_names` to decorate `Enum` subclasses. +Sub-conversions are [dynamic conversions](#dynamic-conversions--select-conversions-at-runtime) applied on the result of a conversion. ```python -{!as_names.py!} +{!sub_conversions.py!} ``` -## Object deserialization — transform function into a dataclass deserializer +Sub-conversions can also be used to [bypass registered conversions](#bypass-registered-conversion) or to define [recursive conversions](#lazyrecursive-conversions). -`apischema.objects.object_deserialization` can convert a function into a new function taking a unique parameter, a dataclass whose fields are mapped from the original function parameters. +## Lazy/recursive conversions -It can be used for example to build a deserialization conversion from an alternative constructor. +Conversions can be defined lazily, i.e. using a function returning `Conversion` (single, or a tuple of it); this function must be wrapped into a `apischema.conversions.LazyConversion` instance. + +It allows creating recursive conversions or using a conversion object which can be modified after its definition (for example a conversion for a base class modified by `__init_subclass__`) +It is used by *apischema* itself for the generated JSON schema. It is indeed a recursive data, and the [different versions](json_schema.md#json-schema--openapi-version) are handled by a conversion with a lazy recursive sub-conversion. ```python -{!object_deserialization.py!} +{!recursive_conversions.py!} ``` -!!! note - Parameters metadata can be specified using `typing.Annotated`, or be passed with `parameters_metadata` parameter, which is a mapping of parameter names as key and mapped metadata as value. - -## Object serialization — select only a subset of fields +### Lazy registered conversions -`apischema.objects.object_serialization` can be used to serialize only a subset of an object fields and methods. +Lazy conversions can also be registered, but the deserialization target/serialization source has to be passed too. ```python -{!object_serialization.py!} +{!lazy_registered_conversion.py!} ``` -## Default conversions +## Conversion helpers -As with almost every default behavior in *apischema*, default conversions can be configured using `apischema.settings.deserialization.default_conversion`/`apischema.settings.serialization.default_conversion`. The initial value of these settings are the function which retrieved conversions registered with `deserializer`/`serializer`. -You can for example [support *attrs*](examples/attrs_support.md) classes with this feature: + +### String conversions + +A common pattern of conversion concerns classes that have a string constructor and a `__str__` method; standard types `uuid.UUID`, `pathlib.Path`, `ipaddress.IPv4Address` are concerned. Using `apischema.conversions.as_str` will register a string-deserializer from the constructor and a string-serializer from the `__str__` method. `ValueError` raised by the constructor is caught and converted to `ValidationError`. ```python -{!examples/attrs_support.py!} +{!as_str.py!} ``` -*apischema* functions (`deserialize`/`serialize`/`deserialization_schema`/`serialization_schema`/`definitions_schema`) also have a `default_conversion` parameter to dynamically modify default conversions. See [FAQ](#whats-the-difference-between-conversion-and-default_conversion-parameters) for the difference between `conversion` and `default_conversion` parameters. +!!! note + Previously mentioned standard types are handled by *apischema* using `as_str`. -## Sub-conversions +### Use `Enum` names -Sub-conversions are [dynamic conversions](#dynamic-conversions--select-conversions-at-runtime) applied on the result of a conversion. +`Enum` subclasses are (de)serialized using values. However, you may want to use enumeration names instead, that's why *apischema* provides `apischema.conversion.as_names` to decorate `Enum` subclasses. ```python -{!sub_conversions.py!} +{!as_names.py!} ``` -Sub-conversions can also be used to [bypass registered conversions](#bypass-registered-conversion) or to define [recursive conversions](#lazyrecursive-conversions). +### Class as union of its subclasses -## Lazy/recursive conversions -Conversions can be defined lazily, i.e. using a function returning `Conversion` (single, or a tuple of it); this function must be wrapped into a `apischema.conversions.LazyConversion` instance. -It allows creating recursive conversions or using a conversion object which can be modified after its definition (for example a conversion for a base class modified by `__init_subclass__`) -It is used by *apischema* itself for the generated JSON schema. It is indeed a recursive data, and the [different versions](json_schema.md#json-schema--openapi-version) are handled by a conversion with a lazy recursive sub-conversion. +### Object deserialization — transform function into a dataclass deserializer + +`apischema.objects.object_deserialization` can convert a function into a new function taking a unique parameter, a dataclass whose fields are mapped from the original function parameters. + +It can be used for example to build a deserialization conversion from an alternative constructor. ```python -{!recursive_conversions.py!} +{!object_deserialization.py!} ``` -### Lazy registered conversions +!!! note +Parameters metadata can be specified using `typing.Annotated`, or be passed with `parameters_metadata` parameter, which is a mapping of parameter names as key and mapped metadata as value. -Lazy conversions can also be registered, but the deserialization target/serialization source has to be passed too. +### Object serialization — select only a subset of fields + +`apischema.objects.object_serialization` can be used to serialize only a subset of an object fields and methods. ```python -{!lazy_registered_conversion.py!} +{!object_serialization.py!} ``` - ## FAQ #### What's the difference between `conversion` and `default_conversion` parameters? diff --git a/docs/validation.md b/docs/validation.md index 68778471..b967dccd 100644 --- a/docs/validation.md +++ b/docs/validation.md @@ -34,6 +34,9 @@ Messages can be customized by setting the corresponding attribute of `apischema. Dataclass validation can be completed by custom validators. These are simple decorated methods which will be executed during validation, after all fields having been deserialized. +!!! note + Previously to v0.17, validators could raise arbitrary exceptions (except AssertionError of course); see [FAQ](#why-validators-cannot-raise-arbitrary-exception) for the reason of this change. + ```python {!validator.py!} ``` @@ -158,4 +161,28 @@ Last but not least, validators can be embedded directly into `Annotated` argumen *apischema* uses type annotations, so every objects used can already be statically type-checked (with *Mypy*/*Pycharm*/etc.) at instantiation but also at modification. #### Why use validators for dataclasses instead of doing validation in `__post_init__`? -Actually, validation can completely be done in `__post_init__`, there is no problem with that. However, validators offers one thing that cannot be achieved with `__post_init__`: they are run before `__init__`, so they can validate incomplete data. Moreover, they are only run during deserialization, so they don't add overhead to normal class instantiation. \ No newline at end of file +Actually, validation can completely be done in `__post_init__`, there is no problem with that. However, validators offers one thing that cannot be achieved with `__post_init__`: they are run before `__init__`, so they can validate incomplete data. Moreover, they are only run during deserialization, so they don't add overhead to normal class instantiation. + +#### Why validators cannot raise arbitrary exception? + +Allowing arbitrary exception is in fact a security issue, because unwanted exception could be raised, and their message displayed in validation error. It could either contain sensitive data, or give information about the implementation which could be used to hack it. + +By the way, it's possible to define a decorator to convert precise exceptions to `ValidationError`: +```python +from collections.abc import Callable +from functools import wraps +from typing import TypeVar +from apischema import ValidationError + +Func = TypeVar("Func", bound=Callable) +def catch(*exceptions) -> Callable[[Func], Func]: + def decorator(func: Func) -> Func: + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as err: + raise ValidationError(str(err)) if isinstance(err, exceptions) else err + return wrapper + return decorator +``` \ No newline at end of file diff --git a/examples/field_validator.py b/examples/field_validator.py index 23d3950f..5b88a81b 100644 --- a/examples/field_validator.py +++ b/examples/field_validator.py @@ -8,7 +8,7 @@ def check_no_duplicate_digits(n: int): if len(str(n)) != len(set(str(n))): - raise ValueError("number has duplicate digits") + raise ValidationError("number has duplicate digits") @dataclass diff --git a/examples/validator.py b/examples/validator.py index 5e17a5e5..c9726db8 100644 --- a/examples/validator.py +++ b/examples/validator.py @@ -14,7 +14,7 @@ class PasswordForm: def password_match(self): # DO NOT use assert if self.password != self.confirmation: - raise ValueError("password doesn't match its confirmation") + raise ValidationError("password doesn't match its confirmation") with raises(ValidationError) as err: diff --git a/examples/validator_function.py b/examples/validator_function.py index 723029dc..e779d8c1 100644 --- a/examples/validator_function.py +++ b/examples/validator_function.py @@ -12,7 +12,7 @@ def check_palindrome(s: Palindrome): for i in range(len(s) // 2): if s[i] != s[-1 - i]: - raise ValueError("Not a palindrome") + raise ValidationError("Not a palindrome") assert deserialize(Palindrome, "tacocat") == "tacocat" diff --git a/examples/validator_inheritance.py b/examples/validator_inheritance.py index 502e1f55..7f0844ce 100644 --- a/examples/validator_inheritance.py +++ b/examples/validator_inheritance.py @@ -13,7 +13,7 @@ class PasswordForm: @validator def password_match(self): if self.password != self.confirmation: - raise ValueError("password doesn't match its confirmation") + raise ValidationError("password doesn't match its confirmation") @dataclass diff --git a/tests/validation/test_validator.py b/tests/validation/test_validator.py index c8874f13..832fa05e 100644 --- a/tests/validation/test_validator.py +++ b/tests/validation/test_validator.py @@ -22,7 +22,7 @@ def a_gt_10(self): @validator def a_lt_100(self): if self.a >= 100: - raise ValueError("error2") + raise ValidationError("error2") @validator def non_trivial(self): @@ -49,9 +49,9 @@ def test_validator_descriptor(): validator = get_validators_by_method(Data, Data.a_gt_10) assert validator.dependencies == {"a"} # Can be called from class and instance - with raises(ValueError): + with raises(ValidationError): assert Data(200, 0).a_lt_100() - with raises(ValueError): + with raises(ValidationError): assert Data.a_lt_100(Data(200, 0)) From 6adf66054fbec095678df2f27f23d3005cdffae3 Mon Sep 17 00:00:00 2001 From: Joseph Perez Date: Thu, 11 Nov 2021 22:24:56 +0100 Subject: [PATCH 2/5] Make ValidationError a normal class --- apischema/validation/errors.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/apischema/validation/errors.py b/apischema/validation/errors.py index d34bcc7f..fa5b8e70 100644 --- a/apischema/validation/errors.py +++ b/apischema/validation/errors.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass, field, replace +from dataclasses import replace from functools import reduce from typing import ( Any, @@ -46,11 +46,7 @@ class LocalizedError(TypedDict): LocalizedError = Mapping[str, Any] # type: ignore -@dataclass(frozen=True) class ValidationError(Exception): - messages: Sequence[ErrorMsg] = field(default_factory=list) - children: Mapping[ErrorKey, "ValidationError"] = field(default_factory=dict) - @overload def __init__(self, __message: str): ... @@ -70,11 +66,11 @@ def __init__( ): if isinstance(messages, str): messages = [messages] - super().__setattr__("messages", messages or []) - super().__setattr__("children", children or {}) + self.messages: Sequence[str] = messages or [] + self.children: Mapping[ErrorKey, "ValidationError"] = children or {} def __str__(self): - return repr(self) + return f"{ValidationError.__name__}: {self.errors}" def _errors(self) -> Iterator[Tuple[List[ErrorKey], ErrorMsg]]: for msg in self.messages: From 137acaa5c67b5bf43e3911ecfcd2c27c60217305 Mon Sep 17 00:00:00 2001 From: Joseph Perez Date: Thu, 11 Nov 2021 22:33:31 +0100 Subject: [PATCH 3/5] Fix tests --- apischema/validation/errors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apischema/validation/errors.py b/apischema/validation/errors.py index fa5b8e70..a57a7615 100644 --- a/apischema/validation/errors.py +++ b/apischema/validation/errors.py @@ -1,4 +1,3 @@ -from dataclasses import replace from functools import reduce from typing import ( Any, @@ -139,7 +138,7 @@ def apply_aliaser(error: ValidationError, aliaser: Aliaser) -> ValidationError: child2 = apply_aliaser(child, aliaser) aliased |= child2 is not child aliased_children[key] = child2 - return replace(error, children=aliased_children) if aliased else error + return ValidationError(error.messages, aliased_children) if aliased else error def _rec_build_error(path: Sequence[ErrorKey], msg: ErrorMsg) -> ValidationError: From 6ac9fe1a1207e8143c8a1cfd4a860ba1004b6742 Mon Sep 17 00:00:00 2001 From: Joseph Perez Date: Thu, 11 Nov 2021 22:38:32 +0100 Subject: [PATCH 4/5] Fix test --- tests/validation/test_validator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/validation/test_validator.py b/tests/validation/test_validator.py index 832fa05e..47d4ae9a 100644 --- a/tests/validation/test_validator.py +++ b/tests/validation/test_validator.py @@ -59,10 +59,10 @@ def test_validate(): validate(Data(42, 0)) with raises(ValidationError) as err: validate(Data(0, 0)) - assert err.value == ValidationError(["error"]) + assert err.value.errors == [{"loc": [], "err": "error"}] with raises(ValidationError) as err: validate(Data(200, 0)) - assert err.value == ValidationError(["error2"]) + assert err.value.errors == [{"loc": [], "err": "error2"}] def test_non_trivial(): From 20d27df049e891b5f10cee47e43762499e55b112 Mon Sep 17 00:00:00 2001 From: Joseph Perez Date: Thu, 11 Nov 2021 22:51:39 +0100 Subject: [PATCH 5/5] Fix test --- tests/validation/test_validator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/validation/test_validator.py b/tests/validation/test_validator.py index 47d4ae9a..02ae89b0 100644 --- a/tests/validation/test_validator.py +++ b/tests/validation/test_validator.py @@ -59,10 +59,10 @@ def test_validate(): validate(Data(42, 0)) with raises(ValidationError) as err: validate(Data(0, 0)) - assert err.value.errors == [{"loc": [], "err": "error"}] + assert err.value.errors == [{"loc": [], "msg": "error"}] with raises(ValidationError) as err: validate(Data(200, 0)) - assert err.value.errors == [{"loc": [], "err": "error2"}] + assert err.value.errors == [{"loc": [], "msg": "error2"}] def test_non_trivial():