From 275fa9bbee20d5a522212949b50a4f55d2337235 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Fri, 21 Jan 2022 13:55:37 +0000 Subject: [PATCH] Make null strings decode to empty object in JSON codec --- docs/metadata.md | 4 +++- python/CHANGELOG.rst | 6 ++++++ python/lwt_interface/dict_encoding_testlib.py | 7 ++----- python/tests/test_metadata.py | 5 +++++ python/tskit/metadata.py | 6 +++++- 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/docs/metadata.md b/docs/metadata.md index 974582bbac..ee2538040e 100644 --- a/docs/metadata.md +++ b/docs/metadata.md @@ -122,7 +122,9 @@ the human readable [JSON](https://www.json.org/json-en.html) format. As this for is human readable and encodes numbers as text it uses more bytes than the `struct` format. However it is simpler to configure as it doesn't require any format specifier for each type in the schema. Default values for properties can be specified for only -the shallowest level of the metadata object. +the shallowest level of the metadata object. Tskit deviates from standard JSON in that +empty metadata is interpreted as an empty object. This is to allow setting of a schema +to a table with out the need to modify all existing empty rows. ### struct diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index 99b31454f7..f6540591bc 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -7,6 +7,12 @@ - ``VcfWriter.write`` now prints the site ID of variants in the ID field of the output VCF files. (:user:`roohy`, :issue:`2103`, :pr:`2107`) +**Breaking Changes** + +- The JSON metadata codec now interprets the empty string as an empty object. This means + that applying a schema to an existing table will no longer necessitate modifying the + existing rows. (:user:`benjeffery`, :issue:`2064`, :pr:`2104`) + ---------------------- [0.4.1] - 2022-01-11 diff --git a/python/lwt_interface/dict_encoding_testlib.py b/python/lwt_interface/dict_encoding_testlib.py index 3e5ec60ad3..823eef9948 100644 --- a/python/lwt_interface/dict_encoding_testlib.py +++ b/python/lwt_interface/dict_encoding_testlib.py @@ -27,7 +27,6 @@ See the test_example_c_module file for an example. """ import copy -import json import kastore import msprime @@ -222,8 +221,7 @@ def test_missing_metadata(self, tables): lwt.fromdict(d) tables = tskit.TableCollection.fromdict(lwt.asdict()) # Empty byte field still gets interpreted by schema - with pytest.raises(json.decoder.JSONDecodeError): - tables.metadata + assert tables.metadata == {} def test_missing_metadata_schema(self, tables): assert repr(tables.metadata_schema) != "" @@ -713,8 +711,7 @@ def test_top_level_metadata(self, tables): out = lwt.asdict() assert "metadata" not in out tables = tskit.TableCollection.fromdict(out) - with pytest.raises(json.decoder.JSONDecodeError): - tables.metadata + assert tables.metadata == {} # Missing is tested in TestMissingData above def test_top_level_metadata_schema(self, tables): diff --git a/python/tests/test_metadata.py b/python/tests/test_metadata.py index 9dfe6c715c..5114162341 100644 --- a/python/tests/test_metadata.py +++ b/python/tests/test_metadata.py @@ -503,6 +503,7 @@ def test_simple_default(self): "properties": {"number": {"type": "number", "default": 5}}, } ms = tskit.MetadataSchema(schema) + assert ms.decode_row(b"") == {"number": 5} assert ms.decode_row(ms.validate_and_encode_row({})) == {"number": 5} assert ms.decode_row(ms.validate_and_encode_row({"number": 42})) == { "number": 42 @@ -572,6 +573,10 @@ def test_dont_skip_validation_other_codecs(self): ms.validate_and_encode_row({"int": 1}) assert mocked_validate.call_count == 1 + def test_zero_length(self): + ms = tskit.MetadataSchema({"codec": "json"}) + assert ms.decode_row(b"") == {} + class TestStructCodec: def encode_decode(self, method_name, sub_schema, obj, buffer): diff --git a/python/tskit/metadata.py b/python/tskit/metadata.py index aba8eb2cda..66b2e90dd9 100644 --- a/python/tskit/metadata.py +++ b/python/tskit/metadata.py @@ -163,7 +163,11 @@ def encode(self, obj: Any) -> bytes: ) def decode(self, encoded: bytes) -> Any: - result = json.loads(encoded.decode()) + if len(encoded) == 0: + result = {} + else: + result = json.loads(encoded.decode()) + # Assign default values if isinstance(result, dict): return dict(self.defaults, **result)