From 8ccb60776ba2e2a8ff1be5d6ab3fce66067f0611 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Fri, 15 Jan 2021 02:10:40 +0000 Subject: [PATCH] Make struct codec encode default values --- docs/metadata.rst | 5 ++-- python/CHANGELOG.rst | 2 ++ python/tests/test_metadata.py | 48 +++++++++++++++++++++++++++++++++++ python/tskit/metadata.py | 47 +++++++++++++++++++++++++++++----- 4 files changed, 93 insertions(+), 9 deletions(-) diff --git a/docs/metadata.rst b/docs/metadata.rst index 967c56ddab..68f8912705 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -82,8 +82,9 @@ This codec places extra restrictions on the schema: This sets the binary encoding used for the property. #. All metadata objects must have fixed properties. - This means that they can have no missing properties and have no additional - properties not listed in the schema. + This means that they can no additional properties not listed in the schema. Any + property that does not have a `default` specified in the schema must be present. + Default values will be encoded. #. Arrays must be lists of homogeneous objects. For example, this is not valid:: diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index d9c09cc7e1..bc0831ad9f 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -15,6 +15,8 @@ ``metadata={}`` is no longer needed as an argument when a schema is present (:user:`benjeffery`, :issue:`1084`). +- ``default`` in metadata schemas is used to fill in missing values when encoding for + the struct codec. (:user:`benjeffery`, :issue:`1073`, :pr:`1116`). -------------------- [0.3.4] - 2020-12-02 diff --git a/python/tests/test_metadata.py b/python/tests/test_metadata.py index 23f0b34535..b2b95ef369 100644 --- a/python/tests/test_metadata.py +++ b/python/tests/test_metadata.py @@ -1006,6 +1006,23 @@ def test_null_union_top_level(self): assert ms.decode_row(ms.validate_and_encode_row(row_data)) == row_data assert ms.decode_row(ms.validate_and_encode_row(None)) is None + def test_default_values(self): + schema = { + "codec": "struct", + "type": "object", + "properties": { + "int": {"type": "number", "binaryFormat": "b", "default": 42}, + "float": {"type": "number", "binaryFormat": "d"}, + }, + } + ms = metadata.MetadataSchema(schema) + row_data = {"float": 5.5} + assert ms.validate_and_encode_row(row_data) == b"\x00\x00\x00\x00\x00\x00\x16@*" + assert ms.decode_row(ms.validate_and_encode_row(row_data)) == { + "float": 5.5, + "int": 42, + } + class TestStructCodecRoundTrip: def round_trip(self, schema, row_data): @@ -1494,6 +1511,37 @@ def test_additional_properties(self): ): metadata.MetadataSchema(schema) + def test_unrequired_property_needs_default(self): + schema = { + "codec": "struct", + "type": "object", + "properties": { + "int": {"type": "number", "binaryFormat": "i"}, + "float": {"type": "number", "binaryFormat": "d"}, + }, + "required": ["float"], + } + with pytest.raises( + exceptions.MetadataSchemaValidationError, + match="Optional property 'int' must have a default value", + ): + metadata.MetadataSchema(schema) + + def test_no_default_implies_required(self): + schema = { + "codec": "struct", + "type": "object", + "properties": { + "int": {"type": "number", "binaryFormat": "i", "default": 5}, + "float": {"type": "number", "binaryFormat": "d"}, + }, + } + self.encode(schema, {"float": 5.5}) + with pytest.raises( + exceptions.MetadataValidationError, match="'float' is a required property" + ): + self.encode(schema, {}) + class TestSLiMDecoding: """ diff --git a/python/tskit/metadata.py b/python/tskit/metadata.py index 5dd9db261f..46bc297b83 100644 --- a/python/tskit/metadata.py +++ b/python/tskit/metadata.py @@ -163,8 +163,21 @@ def binary_format_validator(validator, types, instance, schema): ) +def required_validator(validator, required, instance, schema): + # Do the normal validation + yield from jsonschema._validators.required(validator, required, instance, schema) + + # For struct codec if a property is not required, then it must have a default + for prop, sub_schema in instance["properties"].items(): + if prop not in instance["required"] and "default" not in sub_schema: + yield jsonschema.ValidationError( + f"Optional property '{prop}' must have" f" a default value" + ) + + StructCodecSchemaValidator = jsonschema.validators.extend( - TSKITMetadataSchemaValidator, {"type": binary_format_validator} + TSKITMetadataSchemaValidator, + {"type": binary_format_validator, "required": required_validator}, ) META_SCHEMA: Mapping[str, Any] = copy.deepcopy(StructCodecSchemaValidator.META_SCHEMA) # No union types @@ -410,9 +423,22 @@ def make_object_encode(cls, sub_schema): key: StructCodec.make_encode(prop) for key, prop in sub_schema["properties"].items() } - return lambda obj: b"".join( - sub_encoder(obj[key]) for key, sub_encoder in sub_encoders.items() - ) + defaults = { + key: prop["default"] + for key, prop in sub_schema["properties"].items() + if "default" in prop + } + + def object_encode(obj): + values = [] + for key, sub_encoder in sub_encoders.items(): + try: + values.append(sub_encoder(obj[key])) + except KeyError: + values.append(sub_encoder(defaults[key])) + return b"".join(values) + + return object_encode @classmethod def make_object_or_null_encode(cls, sub_schema): @@ -445,7 +471,7 @@ def make_numeric_encode(cls, sub_schema): @classmethod def modify_schema(cls, schema: Mapping) -> Mapping: - # This codec requires that all properties are required and additional ones + # This codec requires that additional properties are # not allowed. Rather than get schema authors to repeat that everywhere # we add it here, sadly we can't do this in the metaschema as "default" isn't # used by the validator. @@ -454,12 +480,19 @@ def enforce_fixed_properties(obj): return [enforce_fixed_properties(j) for j in obj] elif type(obj) == dict: ret = {k: enforce_fixed_properties(v) for k, v in obj.items()} - if ret.get("type") == "object": + if "object" in ret.get("type", []): if ret.get("additional_properties"): raise ValueError( "Struct codec does not support additional_properties" ) - ret["required"] = list(ret.get("properties", {}).keys()) + # To prevent authors having to list required properties the default + # is that all without a default are required. + if "required" not in ret: + ret["required"] = [ + prop + for prop, sub_schema in ret.get("properties", {}).items() + if "default" not in sub_schema + ] ret["additionalProperties"] = False return ret else: