diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 3f50b817df..f539e0744e 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -5074,6 +5074,8 @@ test_table_collection_union(void) tsk_table_collection_t tables_empty; tsk_table_collection_t tables_copy; tsk_id_t node_mapping[3]; + char example_metadata[100] = "An example of metadata with unicode 🎄🌳🌴🌲🎋"; + tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata); memset(node_mapping, 0xff, sizeof(node_mapping)); @@ -5087,8 +5089,14 @@ test_table_collection_union(void) CU_ASSERT_EQUAL_FATAL(ret, 0); // does not error on empty tables - ret = tsk_table_collection_union( - &tables, &tables_empty, node_mapping, TSK_UNION_NO_CHECK_SHARED); + ret = tsk_table_collection_union(&tables, &tables_empty, node_mapping, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + // does not error on empty tables but that differ on top level metadata + ret = tsk_table_collection_set_metadata( + &tables, example_metadata, example_metadata_length); + CU_ASSERT_EQUAL(ret, 0); + ret = tsk_table_collection_union(&tables, &tables_empty, node_mapping, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); // three nodes, two pop, three ind, two edge, two site, two mut diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 074386ae91..7d3fef08ba 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -9256,14 +9256,6 @@ tsk_check_subset_equality(tsk_table_collection_t *self, if (ret != 0) { goto out; } - ret = tsk_provenance_table_clear(&other_copy.provenances); - if (ret != 0) { - goto out; - } - ret = tsk_provenance_table_clear(&self_copy.provenances); - if (ret != 0) { - goto out; - } ret = tsk_table_collection_subset(&self_copy, self_nodes, num_shared_nodes); if (ret != 0) { goto out; @@ -9272,7 +9264,8 @@ tsk_check_subset_equality(tsk_table_collection_t *self, if (ret != 0) { goto out; } - if (!tsk_table_collection_equals(&self_copy, &other_copy, 0)) { + if (!tsk_table_collection_equals(&self_copy, &other_copy, + TSK_CMP_IGNORE_TS_METADATA | TSK_CMP_IGNORE_PROVENANCE)) { ret = TSK_ERR_UNION_DIFF_HISTORIES; goto out; } diff --git a/python/tests/test_tables.py b/python/tests/test_tables.py index e790953b67..099d718513 100644 --- a/python/tests/test_tables.py +++ b/python/tests/test_tables.py @@ -3216,6 +3216,12 @@ def split_example(self, ts, T): i if i < len(shared_nodes) else tskit.NULL for i in range(tables2.nodes.num_rows) ] + # adding some metadata to one of the tables + # union should disregard differences in metadata + tables1.metadata_schema = tskit.MetadataSchema( + {"codec": "json", "type": "object"} + ) + tables1.metadata = {"hello": "world"} return tables1, tables2, node_mapping def verify_union_equality(self, tables, other, node_mapping, add_populations=True): @@ -3225,7 +3231,7 @@ def verify_union_equality(self, tables, other, node_mapping, add_populations=Tru uni1.union( other, node_mapping, - record_provenance=False, + record_provenance=True, add_populations=add_populations, ) tsutil.py_union( @@ -3235,15 +3241,13 @@ def verify_union_equality(self, tables, other, node_mapping, add_populations=Tru record_provenance=False, add_populations=add_populations, ) - assert uni1 == uni2 + assert uni1.equals(uni2, ignore_ts_metadata=True, ignore_provenance=True) # verifying that subsetting to original nodes return the same table orig_nodes = [j for i, j in enumerate(node_mapping) if j != tskit.NULL] uni1.subset(orig_nodes) # subsetting tables just to make sure order is the same tables.subset(orig_nodes) - uni1.provenances.clear() - tables.provenances.clear() - assert uni1 == tables + assert uni1.equals(tables, ignore_ts_metadata=True, ignore_provenance=True) def test_noshared_example(self): ts1 = self.get_msprime_example(sample_size=3, T=2, seed=9328)