Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove arbitrary exception conversion to ValidationError #244

Merged
merged 5 commits into from
Nov 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion apischema/conversions/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from apischema.types import AnyType
from apischema.typing import is_type_var
from apischema.utils import get_args2, get_origin_or_type, stop_signature_abuse
from apischema.validation.errors import ValidationError

if TYPE_CHECKING:
pass
Expand Down Expand Up @@ -181,7 +182,13 @@ def reset_serializer(cls: Type):


def as_str(cls: Cls) -> Cls:
deserializer(Conversion(cls, source=str))
def wrapper(data):
try:
return cls(data)
except ValueError as err:
raise ValidationError([str(err)])

deserializer(Conversion(wrapper, source=str, target=cls))
serializer(Conversion(str, source=cls))
return cls

Expand Down
27 changes: 3 additions & 24 deletions apischema/deserialization/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from apischema.validation.errors import ValidationError, merge_errors
from apischema.validation.mock import ValidatorMock
from apischema.validation.validators import Validator, validate
from apischema.visitor import Unsupported

if TYPE_CHECKING:
pass
Expand Down Expand Up @@ -480,17 +479,7 @@ def deserialize(self, data: Any) -> Any:
raise error
elif field_errors or errors:
raise ValidationError(errors, field_errors)
try:
res = self.cls(**values)
except (AssertionError, ValidationError):
raise
except TypeError as err:
if str(err).startswith("__init__() got"):
raise Unsupported(self.cls)
else:
raise ValidationError([str(err)])
except Exception as err:
raise ValidationError([str(err)])
res = self.cls(**values)
if self.validators:
validate(res, validators2, init, aliaser=self.aliaser)
return res
Expand Down Expand Up @@ -651,12 +640,7 @@ class ConversionMethod(DeserializationMethod):
method: DeserializationMethod

def deserialize(self, data: Any) -> Any:
try:
return self.converter(self.method.deserialize(data))
except (ValidationError, AssertionError):
raise
except Exception as err:
raise ValidationError([str(err)])
return self.converter(self.method.deserialize(data))


@dataclass
Expand All @@ -678,11 +662,6 @@ def deserialize(self, data: Any) -> Any:
except ValidationError as err:
error = merge_errors(error, err)
else:
try:
return alternative.converter(value)
except (ValidationError, AssertionError):
raise
except Exception as err:
raise ValidationError([str(err)])
return alternative.converter(value)
assert error is not None
raise error
29 changes: 23 additions & 6 deletions apischema/validation/errors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from dataclasses import dataclass, field, replace
from functools import reduce
from typing import (
Any,
Expand Down Expand Up @@ -46,13 +45,31 @@ class LocalizedError(TypedDict):
LocalizedError = Mapping[str, Any] # type: ignore


@dataclass
class ValidationError(Exception):
messages: Sequence[ErrorMsg] = field(default_factory=list)
children: Mapping[ErrorKey, "ValidationError"] = field(default_factory=dict)
@overload
def __init__(self, __message: str):
...

@overload
def __init__(
self,
messages: Sequence[ErrorMsg] = None,
children: Mapping[ErrorKey, "ValidationError"] = None,
):
...

def __init__(
self,
messages: Union[ErrorMsg, Sequence[ErrorMsg]] = None,
children: Mapping[ErrorKey, "ValidationError"] = None,
):
if isinstance(messages, str):
messages = [messages]
self.messages: Sequence[str] = messages or []
self.children: Mapping[ErrorKey, "ValidationError"] = children or {}

def __str__(self):
return repr(self)
return f"{ValidationError.__name__}: {self.errors}"

def _errors(self) -> Iterator[Tuple[List[ErrorKey], ErrorMsg]]:
for msg in self.messages:
Expand Down Expand Up @@ -121,7 +138,7 @@ def apply_aliaser(error: ValidationError, aliaser: Aliaser) -> ValidationError:
child2 = apply_aliaser(child, aliaser)
aliased |= child2 is not child
aliased_children[key] = child2
return replace(error, children=aliased_children) if aliased else error
return ValidationError(error.messages, aliased_children) if aliased else error


def _rec_build_error(path: Sequence[ErrorKey], msg: ErrorMsg) -> ValidationError:
Expand Down
4 changes: 0 additions & 4 deletions apischema/validation/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,6 @@ def validate(
except NonTrivialDependency as exc:
exc.validator = validator
raise
except AssertionError:
raise
except Exception as e:
err = ValidationError([str(e)])
else:
continue
if validator.field is not None:
Expand Down
80 changes: 44 additions & 36 deletions docs/conversions.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,90 +160,98 @@ Serialized methods can also have dedicated conversions for their return
{!serialized_conversions.py!}
```

## String conversions

A common pattern of conversion concerns classes that have a string constructor and a `__str__` method; standard types `uuid.UUID`, `pathlib.Path`, `ipaddress.IPv4Address` are concerned. Using `apischema.conversions.as_str` will register a string-deserializer from the constructor and a string-serializer from the `__str__` method.
## Default conversions

As with almost every default behavior in *apischema*, default conversions can be configured using `apischema.settings.deserialization.default_conversion`/`apischema.settings.serialization.default_conversion`. The initial value of these settings are the function which retrieved conversions registered with `deserializer`/`serializer`.

You can for example [support *attrs*](examples/attrs_support.md) classes with this feature:

```python
{!as_str.py!}
{!examples/attrs_support.py!}
```

!!! note
Previously mentioned standard types are handled by *apischema* using `as_str`.
*apischema* functions (`deserialize`/`serialize`/`deserialization_schema`/`serialization_schema`/`definitions_schema`) also have a `default_conversion` parameter to dynamically modify default conversions. See [FAQ](#whats-the-difference-between-conversion-and-default_conversion-parameters) for the difference between `conversion` and `default_conversion` parameters.

## Use `Enum` names
## Sub-conversions

`Enum` subclasses are (de)serialized using values. However, you may want to use enumeration names instead, that's why *apischema* provides `apischema.conversion.as_names` to decorate `Enum` subclasses.
Sub-conversions are [dynamic conversions](#dynamic-conversions--select-conversions-at-runtime) applied on the result of a conversion.

```python
{!as_names.py!}
{!sub_conversions.py!}
```

## Object deserialization — transform function into a dataclass deserializer
Sub-conversions can also be used to [bypass registered conversions](#bypass-registered-conversion) or to define [recursive conversions](#lazyrecursive-conversions).

`apischema.objects.object_deserialization` can convert a function into a new function taking a unique parameter, a dataclass whose fields are mapped from the original function parameters.
## Lazy/recursive conversions

It can be used for example to build a deserialization conversion from an alternative constructor.
Conversions can be defined lazily, i.e. using a function returning `Conversion` (single, or a tuple of it); this function must be wrapped into a `apischema.conversions.LazyConversion` instance.

It allows creating recursive conversions or using a conversion object which can be modified after its definition (for example a conversion for a base class modified by `__init_subclass__`)

It is used by *apischema* itself for the generated JSON schema. It is indeed a recursive data, and the [different versions](json_schema.md#json-schema--openapi-version) are handled by a conversion with a lazy recursive sub-conversion.

```python
{!object_deserialization.py!}
{!recursive_conversions.py!}
```

!!! note
Parameters metadata can be specified using `typing.Annotated`, or be passed with `parameters_metadata` parameter, which is a mapping of parameter names as key and mapped metadata as value.

## Object serialization — select only a subset of fields
### Lazy registered conversions

`apischema.objects.object_serialization` can be used to serialize only a subset of an object fields and methods.
Lazy conversions can also be registered, but the deserialization target/serialization source has to be passed too.

```python
{!object_serialization.py!}
{!lazy_registered_conversion.py!}
```

## Default conversions
## Conversion helpers

As with almost every default behavior in *apischema*, default conversions can be configured using `apischema.settings.deserialization.default_conversion`/`apischema.settings.serialization.default_conversion`. The initial value of these settings are the function which retrieved conversions registered with `deserializer`/`serializer`.

You can for example [support *attrs*](examples/attrs_support.md) classes with this feature:

### String conversions

A common pattern of conversion concerns classes that have a string constructor and a `__str__` method; standard types `uuid.UUID`, `pathlib.Path`, `ipaddress.IPv4Address` are concerned. Using `apischema.conversions.as_str` will register a string-deserializer from the constructor and a string-serializer from the `__str__` method. `ValueError` raised by the constructor is caught and converted to `ValidationError`.

```python
{!examples/attrs_support.py!}
{!as_str.py!}
```

*apischema* functions (`deserialize`/`serialize`/`deserialization_schema`/`serialization_schema`/`definitions_schema`) also have a `default_conversion` parameter to dynamically modify default conversions. See [FAQ](#whats-the-difference-between-conversion-and-default_conversion-parameters) for the difference between `conversion` and `default_conversion` parameters.
!!! note
Previously mentioned standard types are handled by *apischema* using `as_str`.

## Sub-conversions
### Use `Enum` names

Sub-conversions are [dynamic conversions](#dynamic-conversions--select-conversions-at-runtime) applied on the result of a conversion.
`Enum` subclasses are (de)serialized using values. However, you may want to use enumeration names instead, that's why *apischema* provides `apischema.conversion.as_names` to decorate `Enum` subclasses.

```python
{!sub_conversions.py!}
{!as_names.py!}
```

Sub-conversions can also be used to [bypass registered conversions](#bypass-registered-conversion) or to define [recursive conversions](#lazyrecursive-conversions).
### Class as union of its subclasses

## Lazy/recursive conversions

Conversions can be defined lazily, i.e. using a function returning `Conversion` (single, or a tuple of it); this function must be wrapped into a `apischema.conversions.LazyConversion` instance.

It allows creating recursive conversions or using a conversion object which can be modified after its definition (for example a conversion for a base class modified by `__init_subclass__`)

It is used by *apischema* itself for the generated JSON schema. It is indeed a recursive data, and the [different versions](json_schema.md#json-schema--openapi-version) are handled by a conversion with a lazy recursive sub-conversion.
### Object deserialization — transform function into a dataclass deserializer

`apischema.objects.object_deserialization` can convert a function into a new function taking a unique parameter, a dataclass whose fields are mapped from the original function parameters.

It can be used for example to build a deserialization conversion from an alternative constructor.

```python
{!recursive_conversions.py!}
{!object_deserialization.py!}
```

### Lazy registered conversions
!!! note
Parameters metadata can be specified using `typing.Annotated`, or be passed with `parameters_metadata` parameter, which is a mapping of parameter names as key and mapped metadata as value.

Lazy conversions can also be registered, but the deserialization target/serialization source has to be passed too.
### Object serialization — select only a subset of fields

`apischema.objects.object_serialization` can be used to serialize only a subset of an object fields and methods.

```python
{!lazy_registered_conversion.py!}
{!object_serialization.py!}
```


## FAQ

#### What's the difference between `conversion` and `default_conversion` parameters?
Expand Down
29 changes: 28 additions & 1 deletion docs/validation.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ Messages can be customized by setting the corresponding attribute of `apischema.

Dataclass validation can be completed by custom validators. These are simple decorated methods which will be executed during validation, after all fields having been deserialized.

!!! note
Previously to v0.17, validators could raise arbitrary exceptions (except AssertionError of course); see [FAQ](#why-validators-cannot-raise-arbitrary-exception) for the reason of this change.

```python
{!validator.py!}
```
Expand Down Expand Up @@ -158,4 +161,28 @@ Last but not least, validators can be embedded directly into `Annotated` argumen
*apischema* uses type annotations, so every objects used can already be statically type-checked (with *Mypy*/*Pycharm*/etc.) at instantiation but also at modification.

#### Why use validators for dataclasses instead of doing validation in `__post_init__`?
Actually, validation can completely be done in `__post_init__`, there is no problem with that. However, validators offers one thing that cannot be achieved with `__post_init__`: they are run before `__init__`, so they can validate incomplete data. Moreover, they are only run during deserialization, so they don't add overhead to normal class instantiation.
Actually, validation can completely be done in `__post_init__`, there is no problem with that. However, validators offers one thing that cannot be achieved with `__post_init__`: they are run before `__init__`, so they can validate incomplete data. Moreover, they are only run during deserialization, so they don't add overhead to normal class instantiation.

#### Why validators cannot raise arbitrary exception?

Allowing arbitrary exception is in fact a security issue, because unwanted exception could be raised, and their message displayed in validation error. It could either contain sensitive data, or give information about the implementation which could be used to hack it.

By the way, it's possible to define a decorator to convert precise exceptions to `ValidationError`:
```python
from collections.abc import Callable
from functools import wraps
from typing import TypeVar
from apischema import ValidationError

Func = TypeVar("Func", bound=Callable)
def catch(*exceptions) -> Callable[[Func], Func]:
def decorator(func: Func) -> Func:
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as err:
raise ValidationError(str(err)) if isinstance(err, exceptions) else err
return wrapper
return decorator
```
2 changes: 1 addition & 1 deletion examples/field_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

def check_no_duplicate_digits(n: int):
if len(str(n)) != len(set(str(n))):
raise ValueError("number has duplicate digits")
raise ValidationError("number has duplicate digits")


@dataclass
Expand Down
2 changes: 1 addition & 1 deletion examples/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class PasswordForm:
def password_match(self):
# DO NOT use assert
if self.password != self.confirmation:
raise ValueError("password doesn't match its confirmation")
raise ValidationError("password doesn't match its confirmation")


with raises(ValidationError) as err:
Expand Down
2 changes: 1 addition & 1 deletion examples/validator_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def check_palindrome(s: Palindrome):
for i in range(len(s) // 2):
if s[i] != s[-1 - i]:
raise ValueError("Not a palindrome")
raise ValidationError("Not a palindrome")


assert deserialize(Palindrome, "tacocat") == "tacocat"
Expand Down
2 changes: 1 addition & 1 deletion examples/validator_inheritance.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class PasswordForm:
@validator
def password_match(self):
if self.password != self.confirmation:
raise ValueError("password doesn't match its confirmation")
raise ValidationError("password doesn't match its confirmation")


@dataclass
Expand Down
10 changes: 5 additions & 5 deletions tests/validation/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def a_gt_10(self):
@validator
def a_lt_100(self):
if self.a >= 100:
raise ValueError("error2")
raise ValidationError("error2")

@validator
def non_trivial(self):
Expand All @@ -49,20 +49,20 @@ def test_validator_descriptor():
validator = get_validators_by_method(Data, Data.a_gt_10)
assert validator.dependencies == {"a"}
# Can be called from class and instance
with raises(ValueError):
with raises(ValidationError):
assert Data(200, 0).a_lt_100()
with raises(ValueError):
with raises(ValidationError):
assert Data.a_lt_100(Data(200, 0))


def test_validate():
validate(Data(42, 0))
with raises(ValidationError) as err:
validate(Data(0, 0))
assert err.value == ValidationError(["error"])
assert err.value.errors == [{"loc": [], "msg": "error"}]
with raises(ValidationError) as err:
validate(Data(200, 0))
assert err.value == ValidationError(["error2"])
assert err.value.errors == [{"loc": [], "msg": "error2"}]


def test_non_trivial():
Expand Down