From 5cbf581444113ffd82f00dd046f111b7666a7e00 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Tue, 20 Oct 2020 15:28:54 +0100 Subject: [PATCH] More flexible table equality comparisons. Closes #913 --- c/CHANGELOG.rst | 13 +- c/tests/test_file_format.c | 24 +-- c/tests/test_tables.c | 391 ++++++++++++++++++++++++++-------- c/tests/test_trees.c | 32 +-- c/tskit/tables.c | 369 ++++++++++++++++---------------- c/tskit/tables.h | 216 +++++++++++++++---- python/CHANGELOG.rst | 8 +- python/_tskitmodule.c | 185 +++++++++++----- python/tests/conftest.py | 36 ++-- python/tests/test_lowlevel.py | 37 +++- python/tests/test_tables.py | 99 ++++++++- python/tskit/tables.py | 120 ++++++++--- 12 files changed, 1075 insertions(+), 455 deletions(-) diff --git a/c/CHANGELOG.rst b/c/CHANGELOG.rst index 220971c248..98a64633d2 100644 --- a/c/CHANGELOG.rst +++ b/c/CHANGELOG.rst @@ -2,11 +2,14 @@ [0.99.8] - 2020-XX-XX --------------------- -**New features** +**Breaking changes** -- Added ``tsk_table_collection_equals_with_options`` which allows for more flexible equality - criteria (e.g., ignore top-level metadata and schema or provenance tables). - (:user:`mufernando`, :issue:`896`, :pr:`897`) +- Added an ``options`` argument to ``tsk_table_collection_equals`` + and table equality methods to allow for more flexible equality criteria + (e.g., ignore top-level metadata and schema or provenance tables). + Existing code should add an extra final parameter ``0`` to retain the + current behaviour. (:user:`mufernando`, :user:`jeromekelleher`, + :issue:`896`, :pr:`897`, :issue:`913`, :pr:`917`). --------------------- [0.99.7] - 2020-09-29 @@ -35,7 +38,7 @@ **Breaking changes** - The macro ``TSK_IMPUTE_MISSING_DATA`` is renamed to ``TSK_ISOLATED_NOT_MISSING`` - (:user:`benjeffery`, :issue:`716`, :pr:`794`) + (:user:`benjeffery`, :issue:`716`, :pr:`794`) **New features** diff --git a/c/tests/test_file_format.c b/c/tests/test_file_format.c index 5216cb1c50..255b7a3b5f 100644 --- a/c/tests/test_file_format.c +++ b/c/tests/test_file_format.c @@ -301,7 +301,7 @@ test_missing_optional_column_pairs(void) copy_store_drop_columns(ts, 2, drop_cols, _tmp_file_name); ret = tsk_table_collection_load(&t2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); } @@ -484,7 +484,7 @@ test_missing_indexes(void) copy_store_drop_columns(ts, 2, cols, _tmp_file_name); ret = tsk_table_collection_load(&t2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); CU_ASSERT_FALSE(tsk_table_collection_has_index(&t2, 0)); tsk_table_collection_free(&t2); @@ -633,7 +633,7 @@ test_metadata_schemas_optional(void) ret = tsk_table_collection_load(&t2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); /* metadata schemas are included in data comparisons */ - CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); } @@ -933,12 +933,12 @@ test_example_round_trip(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_load(&t2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); /* Reading multiple times into the same tables with TSK_NO_INIT is supported. */ ret = tsk_table_collection_load(&t2, _tmp_file_name, TSK_NO_INIT); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); /* Do the same thing with treeseq API */ @@ -947,7 +947,7 @@ test_example_round_trip(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_treeseq_load(&ts2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0)); tsk_treeseq_free(&ts2); /* Use loadf form */ @@ -957,7 +957,7 @@ test_example_round_trip(void) fseek(f, 0, SEEK_SET); ret = tsk_table_collection_loadf(&t2, f, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); fclose(f); @@ -968,7 +968,7 @@ test_example_round_trip(void) fseek(f, 0, SEEK_SET); ret = tsk_treeseq_loadf(&ts2, f, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0)); tsk_treeseq_free(&ts2); fclose(f); @@ -1004,7 +1004,7 @@ test_multiple_round_trip(void) for (j = 0; j < num_examples; j++) { ret = tsk_table_collection_loadf(&out_tables, f, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables, 0)); tsk_table_collection_free(&out_tables); } @@ -1015,7 +1015,7 @@ test_multiple_round_trip(void) for (j = 0; j < num_examples; j++) { ret = tsk_table_collection_loadf(&out_tables, f, TSK_NO_INIT); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables, 0)); } tsk_table_collection_free(&out_tables); @@ -1030,7 +1030,7 @@ test_multiple_round_trip(void) break; } CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables, 0)); j++; } tsk_table_collection_free(&out_tables); @@ -1055,7 +1055,7 @@ test_copy_store_drop_columns(void) copy_store_drop_columns(ts, 0, NULL, _tmp_file_name); ret = tsk_table_collection_load(&t2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t1); tsk_table_collection_free(&t2); diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index e026ee26f9..3f50b817df 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -106,7 +106,7 @@ insert_edge_metadata(tsk_table_collection_t *tables) } static void -test_table_collection_equals_with_options(void) +test_table_collection_equals_options(void) { int ret; tsk_table_collection_t tc1, tc2; @@ -123,7 +123,7 @@ test_table_collection_equals_with_options(void) CU_ASSERT_EQUAL(ret, 0); ret = tsk_table_collection_init(&tc2, 0); CU_ASSERT_EQUAL(ret, 0); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, 0); + ret = tsk_table_collection_equals(&tc1, &tc2, 0); CU_ASSERT_TRUE(ret); // Adding some meat to the tables @@ -144,7 +144,7 @@ test_table_collection_equals_with_options(void) CU_ASSERT(ret >= 0); // Equality of empty vs non-empty - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, 0); + ret = tsk_table_collection_equals(&tc1, &tc2, 0); CU_ASSERT_FALSE(ret); ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); CU_ASSERT_EQUAL(ret, 0); @@ -153,61 +153,86 @@ test_table_collection_equals_with_options(void) ret = tsk_table_collection_set_metadata( &tc1, example_metadata, example_metadata_length); CU_ASSERT_EQUAL(ret, 0); - ret = tsk_table_collection_equals_with_options( - &tc1, &tc2, TSK_IGNORE_TOP_LEVEL_METADATA); + ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA); CU_ASSERT_TRUE(ret); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, 0); + /* TSK_CMP_IGNORE_METADATA implies TSK_CMP_IGNORE_TS_METADATA */ + ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_METADATA); + CU_ASSERT_TRUE(ret); + ret = tsk_table_collection_equals(&tc1, &tc2, 0); CU_ASSERT_FALSE(ret); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, TSK_IGNORE_PROVENANCE); + ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE); CU_ASSERT_FALSE(ret); ret = tsk_table_collection_set_metadata( &tc2, example_metadata, example_metadata_length); CU_ASSERT_EQUAL(ret, 0); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, 0); + ret = tsk_table_collection_equals(&tc1, &tc2, 0); CU_ASSERT_TRUE(ret); ret = tsk_table_collection_set_metadata_schema( &tc1, example_metadata_schema, example_metadata_schema_length); CU_ASSERT_EQUAL(ret, 0); - ret = tsk_table_collection_equals_with_options( - &tc1, &tc2, TSK_IGNORE_TOP_LEVEL_METADATA); + ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA); CU_ASSERT_TRUE(ret); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, 0); + ret = tsk_table_collection_equals(&tc1, &tc2, 0); CU_ASSERT_FALSE(ret); ret = tsk_table_collection_set_metadata_schema( &tc2, example_metadata_schema, example_metadata_schema_length); CU_ASSERT_EQUAL(ret, 0); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, 0); + ret = tsk_table_collection_equals(&tc1, &tc2, 0); CU_ASSERT_TRUE(ret); // Ignore provenance ret = tsk_provenance_table_add_row(&tc1.provenances, "time", 4, "record", 6); CU_ASSERT_EQUAL(ret, 0); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, TSK_IGNORE_PROVENANCE); + ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE); CU_ASSERT_TRUE(ret); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, 0); + ret = tsk_table_collection_equals(&tc1, &tc2, 0); CU_ASSERT_FALSE(ret); - ret = tsk_table_collection_equals_with_options( - &tc1, &tc2, TSK_IGNORE_TOP_LEVEL_METADATA); + ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA); CU_ASSERT_FALSE(ret); ret = tsk_provenance_table_add_row(&tc2.provenances, "time", 4, "record", 6); CU_ASSERT_EQUAL(ret, 0); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, TSK_IGNORE_PROVENANCE); + ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE); CU_ASSERT_TRUE(ret); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, 0); + ret = tsk_table_collection_equals(&tc1, &tc2, 0); CU_ASSERT_TRUE(ret); - // Both + // Ignore provenance timestamp + ret = tsk_provenance_table_add_row(&tc1.provenances, "time", 4, "record", 6); + CU_ASSERT_FATAL(ret >= 0); + ret = tsk_provenance_table_add_row(&tc2.provenances, "other", 5, "record", 6); + CU_ASSERT_FATAL(ret >= 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TIMESTAMPS)); + + // Ignore provenance and top-level metadata. ret = tsk_provenance_table_clear(&tc1.provenances); CU_ASSERT_EQUAL(ret, 0); example_metadata[0] = 'J'; ret = tsk_table_collection_set_metadata( &tc1, example_metadata, example_metadata_length); CU_ASSERT_EQUAL(ret, 0); - ret = tsk_table_collection_equals_with_options(&tc1, &tc2, 0); + ret = tsk_table_collection_equals(&tc1, &tc2, 0); CU_ASSERT_FALSE(ret); - ret = tsk_table_collection_equals_with_options( - &tc1, &tc2, TSK_IGNORE_TOP_LEVEL_METADATA | TSK_IGNORE_PROVENANCE); + ret = tsk_table_collection_equals( + &tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA | TSK_CMP_IGNORE_PROVENANCE); CU_ASSERT_TRUE(ret); + + tsk_table_collection_free(&tc1); + tsk_table_collection_free(&tc2); + + // Check what happens when one of the tables just differs by metadata. + ret = tsk_table_collection_init(&tc1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_init(&tc2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_population_table_add_row(&tc1.populations, "metadata", 8); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_population_table_add_row(&tc2.populations, "", 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_METADATA)); + tsk_table_collection_free(&tc1); tsk_table_collection_free(&tc2); } @@ -264,23 +289,23 @@ test_table_collection_metadata(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_init(&tc2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_table_collection_set_metadata( &tc1, example_metadata, example_metadata_length); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2)); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_table_collection_set_metadata( &tc2, example_metadata, example_metadata_length); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_table_collection_set_metadata_schema( &tc1, example_metadata_schema, example_metadata_schema_length); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2)); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_table_collection_set_metadata_schema( &tc2, example_metadata_schema, example_metadata_schema_length); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); // Test copy tsk_table_collection_free(&tc1); @@ -292,7 +317,7 @@ test_table_collection_metadata(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_copy(&tc1, &tc2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_table_collection_set_metadata_schema( &tc1, example_metadata_schema, example_metadata_schema_length); @@ -300,7 +325,7 @@ test_table_collection_metadata(void) tsk_table_collection_free(&tc2); ret = tsk_table_collection_copy(&tc1, &tc2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); // Test dump and load with empty metadata and schema tsk_table_collection_free(&tc1); @@ -312,7 +337,7 @@ test_table_collection_metadata(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); // Test dump and load with set metadata and schema tsk_table_collection_free(&tc1); @@ -330,7 +355,7 @@ test_table_collection_metadata(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); tsk_table_collection_free(&tc1); tsk_table_collection_free(&tc2); } @@ -388,6 +413,27 @@ test_node_table(void) CU_ASSERT_EQUAL(node.metadata_length, test_metadata_length); CU_ASSERT_NSTRING_EQUAL(node.metadata, test_metadata, test_metadata_length); } + + /* Test equality with and without metadata */ + tsk_node_table_copy(&table, &table2, 0); + CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the metadata values */ + table2.metadata[0] = 0; + CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the last metadata entry */ + table2.metadata_offset[table2.num_rows] + = table2.metadata_offset[table2.num_rows - 1]; + CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Delete all metadata */ + memset(table2.metadata_offset, 0, + (table2.num_rows + 1) * sizeof(*table2.metadata_offset)); + CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + tsk_node_table_free(&table2); + CU_ASSERT_EQUAL(tsk_node_table_get_row(&table, (tsk_id_t) num_rows, &node), TSK_ERR_NODE_OUT_OF_BOUNDS); tsk_node_table_print_state(&table, _devnull); @@ -568,9 +614,10 @@ test_node_table(void) CU_ASSERT_EQUAL( memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0); tsk_node_table_set_metadata_schema(&table2, example, example_length); - CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2)); + CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0)); tsk_node_table_set_metadata_schema(&table2, example2, example2_length); - CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2)); + CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); tsk_node_table_clear(&table); CU_ASSERT_EQUAL(ret, 0); @@ -773,6 +820,28 @@ test_edge_table_with_options(tsk_flags_t options) ret = tsk_edge_table_truncate(&table, num_rows + 1); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION); + /* Test equality with and without metadata */ + tsk_edge_table_copy(&table, &table2, 0); + CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + if (!(options & TSK_NO_METADATA)) { + /* Change the metadata values */ + table2.metadata[0] = 0; + CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the last metadata entry */ + table2.metadata_offset[table2.num_rows] + = table2.metadata_offset[table2.num_rows - 1]; + CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Delete all metadata */ + memset(table2.metadata_offset, 0, + (table2.num_rows + 1) * sizeof(*table2.metadata_offset)); + CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + } + tsk_edge_table_free(&table2); + /* Inputs cannot be NULL */ ret = tsk_edge_table_set_columns( &table, num_rows, NULL, right, parent, child, metadata, metadata_offset); @@ -864,10 +933,11 @@ test_edge_table_with_options(tsk_flags_t options) memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0); ret = tsk_edge_table_set_metadata_schema(&table2, example, example_length); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2)); + CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0)); ret = tsk_edge_table_set_metadata_schema(&table2, example2, example2_length); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2)); + CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); ret = tsk_edge_table_clear(&table); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -910,7 +980,7 @@ test_edge_table_copy_semantics(void) /* t1 now has metadata. We should be able to copy to another table with metadata */ ret = tsk_table_collection_copy(&t1, &t2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); /* We should not be able to copy into a table with no metadata */ @@ -926,18 +996,18 @@ test_edge_table_copy_semantics(void) */ ret = tsk_table_collection_copy(&t1, &t2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); ret = tsk_table_collection_copy(&t1, &t2, TSK_NO_EDGE_METADATA); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); /* Try copying into a table directly */ ret = tsk_edge_table_copy(&t1.edges, &edges, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_edge_table_equals(&t1.edges, &edges)); + CU_ASSERT_TRUE(tsk_edge_table_equals(&t1.edges, &edges, 0)); tsk_edge_table_free(&edges); tsk_table_collection_free(&t1); @@ -1278,6 +1348,26 @@ test_site_table(void) ret = tsk_site_table_truncate(&table, num_rows + 1); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION); + /* Test equality with and without metadata */ + tsk_site_table_copy(&table, &table2, 0); + CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the metadata values */ + table2.metadata[0] = 0; + CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the last metadata entry */ + table2.metadata_offset[table2.num_rows] + = table2.metadata_offset[table2.num_rows - 1]; + CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Delete all metadata */ + memset(table2.metadata_offset, 0, + (table2.num_rows + 1) * sizeof(*table2.metadata_offset)); + CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + tsk_site_table_free(&table2); + /* Inputs cannot be NULL */ ret = tsk_site_table_set_columns(&table, num_rows, NULL, ancestral_state, ancestral_state_offset, metadata, metadata_offset); @@ -1350,9 +1440,10 @@ test_site_table(void) CU_ASSERT_EQUAL( memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0); tsk_site_table_set_metadata_schema(&table2, example, example_length); - CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2)); + CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0)); tsk_site_table_set_metadata_schema(&table2, example2, example2_length); - CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2)); + CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); ret = tsk_site_table_clear(&table); CU_ASSERT_EQUAL(ret, 0); @@ -1521,6 +1612,26 @@ test_mutation_table(void) CU_ASSERT_EQUAL(table.derived_state_length, num_rows); CU_ASSERT_EQUAL(table.metadata_length, num_rows); + /* Test equality with and without metadata */ + tsk_mutation_table_copy(&table, &table2, 0); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the metadata values */ + table2.metadata[0] = 0; + CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the last metadata entry */ + table2.metadata_offset[table2.num_rows] + = table2.metadata_offset[table2.num_rows - 1]; + CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Delete all metadata */ + memset(table2.metadata_offset, 0, + (table2.num_rows + 1) * sizeof(*table2.metadata_offset)); + CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + tsk_mutation_table_free(&table2); + ret = tsk_mutation_table_truncate(&table, num_rows + 1); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION); @@ -1640,9 +1751,10 @@ test_mutation_table(void) CU_ASSERT_EQUAL( memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0); tsk_mutation_table_set_metadata_schema(&table2, example, example_length); - CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2)); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0)); tsk_mutation_table_set_metadata_schema(&table2, example2, example2_length); - CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2)); + CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); tsk_mutation_table_clear(&table); CU_ASSERT_EQUAL(ret, 0); @@ -1809,6 +1921,26 @@ test_migration_table(void) CU_ASSERT_EQUAL(table.num_rows, num_rows); CU_ASSERT_EQUAL(table.metadata_length, num_rows); + /* Test equality with and without metadata */ + tsk_migration_table_copy(&table, &table2, 0); + CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the metadata values */ + table2.metadata[0] = 0; + CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the last metadata entry */ + table2.metadata_offset[table2.num_rows] + = table2.metadata_offset[table2.num_rows - 1]; + CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Delete all metadata */ + memset(table2.metadata_offset, 0, + (table2.num_rows + 1) * sizeof(*table2.metadata_offset)); + CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + tsk_migration_table_free(&table2); + ret = tsk_migration_table_truncate(&table, num_rows + 1); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION); @@ -1902,9 +2034,10 @@ test_migration_table(void) CU_ASSERT_EQUAL( memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0); tsk_migration_table_set_metadata_schema(&table2, example, example_length); - CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2)); + CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0)); tsk_migration_table_set_metadata_schema(&table2, example2, example2_length); - CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2)); + CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); tsk_migration_table_clear(&table); CU_ASSERT_EQUAL(ret, 0); @@ -1931,7 +2064,6 @@ test_individual_table(void) { int ret = 0; tsk_individual_table_t table, table2; - /* tsk_table_collection_t tables, tables2; */ tsk_size_t num_rows = 100; tsk_id_t j; tsk_size_t k; @@ -1988,6 +2120,31 @@ test_individual_table(void) CU_ASSERT_NSTRING_EQUAL( individual.metadata, test_metadata, test_metadata_length); } + + /* Test equality with and without metadata */ + tsk_individual_table_copy(&table, &table2, 0); + CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the metadata values */ + table2.metadata[0] = 0; + CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the last metadata entry */ + table2.metadata_offset[table2.num_rows] + = table2.metadata_offset[table2.num_rows - 1]; + CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Delete all metadata */ + memset(table2.metadata_offset, 0, + (table2.num_rows + 1) * sizeof(*table2.metadata_offset)); + CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + tsk_individual_table_free(&table2); + ret = tsk_individual_table_get_row(&table, (tsk_id_t) num_rows, &individual); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS); tsk_individual_table_print_state(&table, _devnull); @@ -2175,9 +2332,11 @@ test_individual_table(void) CU_ASSERT_EQUAL( memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0); tsk_individual_table_set_metadata_schema(&table2, example, example_length); - CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2)); + CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0)); tsk_individual_table_set_metadata_schema(&table2, example2, example2_length); - CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2)); + CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); tsk_individual_table_clear(&table); CU_ASSERT_EQUAL(ret, 0); @@ -2249,6 +2408,31 @@ test_population_table(void) CU_ASSERT_EQUAL(population.metadata_length, k); CU_ASSERT_NSTRING_EQUAL(population.metadata, c, k); } + + /* Test equality with and without metadata */ + tsk_population_table_copy(&table, &table2, 0); + CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the metadata values */ + table2.metadata[0] = 0; + CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Change the last metadata entry */ + table2.metadata_offset[table2.num_rows] + = table2.metadata_offset[table2.num_rows - 1]; + CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + /* Delete all metadata */ + memset(table2.metadata_offset, 0, + (table2.num_rows + 1) * sizeof(*table2.metadata_offset)); + CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); + tsk_population_table_free(&table2); + ret = tsk_population_table_get_row(&table, (tsk_id_t) num_rows, &population); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS); tsk_population_table_print_state(&table, _devnull); @@ -2327,9 +2511,11 @@ test_population_table(void) CU_ASSERT_EQUAL( memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0); tsk_population_table_set_metadata_schema(&table2, example, example_length); - CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2)); + CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0)); tsk_population_table_set_metadata_schema(&table2, example2, example2_length); - CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2)); + CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA)); tsk_population_table_clear(&table); CU_ASSERT_EQUAL(ret, 0); @@ -2349,7 +2535,7 @@ static void test_provenance_table(void) { int ret; - tsk_provenance_table_t table; + tsk_provenance_table_t table, table2; tsk_size_t num_rows = 100; tsk_size_t j; char *timestamp; @@ -2471,6 +2657,36 @@ test_provenance_table(void) CU_ASSERT_EQUAL(table.record_length, num_rows); tsk_provenance_table_print_state(&table, _devnull); + /* Test equality with and without timestamp */ + tsk_provenance_table_copy(&table, &table2, 0); + CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS)); + /* Change the timestamp values */ + table2.timestamp[0] = 0; + CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS)); + /* Change the last timestamp entry */ + table2.timestamp_offset[table2.num_rows] + = table2.timestamp_offset[table2.num_rows - 1]; + CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS)); + /* Delete all timestamps */ + memset(table2.timestamp_offset, 0, + (table2.num_rows + 1) * sizeof(*table2.timestamp_offset)); + CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0)); + CU_ASSERT_TRUE( + tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS)); + tsk_provenance_table_free(&table2); + + /* Test equality with and without timestamp */ + tsk_provenance_table_copy(&table, &table2, 0); + table2.record_length = 0; + CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0)); + tsk_provenance_table_free(&table2); + ret = tsk_provenance_table_truncate(&table, num_rows + 1); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION); @@ -3446,7 +3662,7 @@ test_copy_table_collection(void) CU_ASSERT_EQUAL_FATAL(ret, 1); tsk_table_collection_copy(&tables, &tables_copy, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &tables_copy)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &tables_copy, 0)); tsk_table_collection_free(&tables); tsk_table_collection_free(&tables_copy); @@ -3480,7 +3696,7 @@ test_sort_tables_offsets(void) bookmark.edges = tables.edges.num_rows; ret = tsk_table_collection_sort(&tables, &bookmark, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©)); + CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©, 0)); ret = tsk_table_collection_sort(&tables, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -3501,7 +3717,7 @@ test_sort_tables_offsets(void) bookmark.mutations = tables.mutations.num_rows; ret = tsk_table_collection_sort(&tables, &bookmark, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©)); + CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©, 0)); /* Anything other than len(table) leads to an error for sites * and mutations, and we can't specify one without the other. */ @@ -3569,12 +3785,12 @@ test_sort_tables_edge_metadata(void) insert_edge_metadata(&t1); ret = tsk_table_collection_copy(&t1, &t2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); reverse_edges(&t1); - CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0)); ret = tsk_table_collection_sort(&t1, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t1); tsk_table_collection_free(&t2); @@ -3596,24 +3812,24 @@ test_sort_tables_no_edge_metadata(void) ret = tsk_table_collection_copy(&t1, &t2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_FALSE(t2.edges.options & TSK_NO_EDGE_METADATA); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); reverse_edges(&t1); - CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0)); ret = tsk_table_collection_sort(&t1, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); ret = tsk_table_collection_copy(&t1, &t2, TSK_NO_EDGE_METADATA); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(t1.edges.options & TSK_NO_EDGE_METADATA); CU_ASSERT_TRUE(t2.edges.options & TSK_NO_EDGE_METADATA); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); reverse_edges(&t1); - CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0)); ret = tsk_table_collection_sort(&t1, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); tsk_table_collection_free(&t1); @@ -3746,12 +3962,12 @@ test_sort_tables_mutation_times(void) ret = tsk_table_collection_copy(&t1, &t2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); reverse_mutations(&t1); - CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0)); ret = tsk_table_collection_sort(&t1, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t2); tsk_table_collection_free(&t1); @@ -3772,54 +3988,54 @@ test_sorter_interface(void) ret = tsk_treeseq_copy_tables(&ts, &tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0)); /* Nominal case */ reverse_edges(&tables); - CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables)); + CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables, 0)); ret = tsk_table_sorter_init(&sorter, &tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_sorter_run(&sorter, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0)); CU_ASSERT_EQUAL(sorter.user_data, NULL); tsk_table_sorter_free(&sorter); /* If we set the sort_edges function to NULL then we should leave the * node table as is. */ reverse_edges(&tables); - CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges)); + CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges, 0)); ret = tsk_table_sorter_init(&sorter, &tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); sorter.sort_edges = NULL; ret = tsk_table_sorter_run(&sorter, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges)); + CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges, 0)); tsk_table_sorter_free(&sorter); /* Reversing again should make them equal */ reverse_edges(&tables); - CU_ASSERT_TRUE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges)); + CU_ASSERT_TRUE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges, 0)); /* Do not check integrity before sorting */ reverse_edges(&tables); - CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables)); + CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables, 0)); ret = tsk_table_sorter_init(&sorter, &tables, TSK_NO_CHECK_INTEGRITY); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_sorter_run(&sorter, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0)); tsk_table_sorter_free(&sorter); /* The user_data shouldn't be touched */ reverse_edges(&tables); - CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables)); + CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables, 0)); ret = tsk_table_sorter_init(&sorter, &tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); sorter.user_data = (void *) &ts; ret = tsk_table_sorter_run(&sorter, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0)); CU_ASSERT_EQUAL_FATAL(sorter.user_data, &ts); tsk_table_sorter_free(&sorter); @@ -3849,8 +4065,8 @@ test_dump_unindexed_with_options(tsk_flags_t tc_options) ret = tsk_table_collection_load(&loaded, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_has_index(&loaded, 0)); - CU_ASSERT_TRUE(tsk_node_table_equals(&tables.nodes, &loaded.nodes)); - CU_ASSERT_TRUE(tsk_edge_table_equals(&tables.edges, &loaded.edges)); + CU_ASSERT_TRUE(tsk_node_table_equals(&tables.nodes, &loaded.nodes, 0)); + CU_ASSERT_TRUE(tsk_edge_table_equals(&tables.edges, &loaded.edges, 0)); tsk_table_collection_free(&loaded); tsk_table_collection_free(&tables); @@ -3876,7 +4092,7 @@ test_dump_load_empty_with_options(tsk_flags_t tc_options) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_load(&t2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t1); tsk_table_collection_free(&t2); @@ -3943,7 +4159,7 @@ test_dump_load_unsorted_with_options(tsk_flags_t tc_options) CU_ASSERT_FALSE(tsk_table_collection_has_index(&t1, 0)); ret = tsk_table_collection_load(&t2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); CU_ASSERT_FALSE(tsk_table_collection_has_index(&t1, 0)); CU_ASSERT_FALSE(tsk_table_collection_has_index(&t2, 0)); @@ -3987,7 +4203,7 @@ test_dump_load_metadata_schema(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_load(&t2, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t1); tsk_table_collection_free(&t2); @@ -4752,7 +4968,7 @@ test_table_collection_subset_with_options(tsk_flags_t options) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_subset(&tables_copy, nodes, 4); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy)); + CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0)); // reverse twice should get back to the start for (k = 0; k < 4; k++) { @@ -4764,7 +4980,7 @@ test_table_collection_subset_with_options(tsk_flags_t options) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_subset(&tables_copy, nodes, 4); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy)); + CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0)); tsk_table_collection_free(&tables_copy); tsk_table_collection_free(&tables); @@ -4916,14 +5132,14 @@ test_table_collection_union(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_union( &tables_copy, &tables_empty, node_mapping, TSK_UNION_NO_CHECK_SHARED); - CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy)); + CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0)); // self is empty ret = tsk_table_collection_clear(&tables_copy); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_union( &tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy)); + CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0)); // union all shared nodes + subset original nodes = original table ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT); @@ -4936,7 +5152,7 @@ test_table_collection_union(void) node_mapping[2] = 2; ret = tsk_table_collection_subset(&tables_copy, node_mapping, 3); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy)); + CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0)); // union with one shared node ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT); @@ -5087,8 +5303,7 @@ main(int argc, char **argv) { "test_population_table", test_population_table }, { "test_provenance_table", test_provenance_table }, { "test_table_size_increments", test_table_size_increments }, - { "test_table_collection_equals_with_options", - test_table_collection_equals_with_options }, + { "test_table_collection_equals_options", test_table_collection_equals_options }, { "test_table_collection_simplify_errors", test_table_collection_simplify_errors }, { "test_table_collection_metadata", test_table_collection_metadata }, diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c index e705613e1c..da2d5c3ace 100644 --- a/c/tests/test_trees.c +++ b/c/tests/test_trees.c @@ -937,12 +937,12 @@ test_simplest_records(void) ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); ret = tsk_treeseq_simplify(&ts, sample_ids, 2, TSK_KEEP_UNARY, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); tsk_treeseq_free(&ts); @@ -970,12 +970,12 @@ test_simplest_nonbinary_records(void) ret = tsk_treeseq_simplify(&ts, sample_ids, 4, 0, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); ret = tsk_treeseq_simplify(&ts, sample_ids, 4, TSK_KEEP_UNARY, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); tsk_treeseq_free(&ts); @@ -1016,7 +1016,7 @@ test_simplest_unary_records(void) ret = tsk_treeseq_simplify(&ts, sample_ids, 2, TSK_KEEP_UNARY, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); tsk_treeseq_free(&ts); @@ -1138,7 +1138,7 @@ test_simplest_degenerate_multiple_root_records(void) ret = tsk_treeseq_simplify(&ts, sample_ids, 2, TSK_KEEP_UNARY, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); tsk_treeseq_free(&ts); @@ -1464,12 +1464,12 @@ test_simplest_holey_tree_sequence(void) ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); ret = tsk_treeseq_simplify(&ts, sample_ids, 2, TSK_KEEP_UNARY, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); tsk_treeseq_free(&ts); @@ -1556,12 +1556,12 @@ test_simplest_initial_gap_tree_sequence(void) ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); ret = tsk_treeseq_simplify(&ts, sample_ids, 2, TSK_KEEP_UNARY, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables)); + CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); tsk_treeseq_free(&simplified); tsk_treeseq_free(&ts); @@ -3847,7 +3847,7 @@ test_single_tree_simplify_no_sample_nodes(void) ret = tsk_table_collection_simplify(&t1, samples, 4, 0, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t1); tsk_table_collection_free(&t2); @@ -3870,7 +3870,7 @@ test_single_tree_simplify_null_samples(void) ret = tsk_table_collection_simplify(&t1, NULL, 0, 0, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2)); + CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0)); tsk_table_collection_free(&t1); tsk_table_collection_free(&t2); @@ -5646,8 +5646,8 @@ test_deduplicate_sites(void) ret = tsk_table_collection_deduplicate_sites(&messy, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - CU_ASSERT_TRUE(tsk_site_table_equals(&tidy.sites, &messy.sites)); - CU_ASSERT_TRUE(tsk_mutation_table_equals(&tidy.mutations, &messy.mutations)); + CU_ASSERT_TRUE(tsk_site_table_equals(&tidy.sites, &messy.sites, 0)); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&tidy.mutations, &messy.mutations, 0)); tsk_site_table_clear(&messy.sites); tsk_mutation_table_clear(&messy.mutations); @@ -5658,8 +5658,8 @@ test_deduplicate_sites(void) ret = tsk_table_collection_deduplicate_sites(&messy, 0); CU_ASSERT_EQUAL(ret, 0); - CU_ASSERT_TRUE(tsk_site_table_equals(&tidy.sites, &messy.sites)); - CU_ASSERT_TRUE(tsk_mutation_table_equals(&tidy.mutations, &messy.mutations)); + CU_ASSERT_TRUE(tsk_site_table_equals(&tidy.sites, &messy.sites, 0)); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&tidy.mutations, &messy.mutations, 0)); tsk_table_collection_free(&tidy); tsk_table_collection_free(&messy); diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 8b423be137..9208694d3e 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -690,21 +690,21 @@ tsk_individual_table_dump_text(const tsk_individual_table_t *self, FILE *out) } bool -tsk_individual_table_equals( - const tsk_individual_table_t *self, const tsk_individual_table_t *other) -{ - bool ret = false; - if (self->num_rows == other->num_rows - && self->metadata_length == other->metadata_length - && self->metadata_schema_length == other->metadata_schema_length) { - ret = memcmp(self->flags, other->flags, self->num_rows * sizeof(tsk_flags_t)) - == 0 - && memcmp(self->location_offset, other->location_offset, - (self->num_rows + 1) * sizeof(tsk_size_t)) - == 0 - && memcmp(self->location, other->location, - self->location_length * sizeof(double)) - == 0 +tsk_individual_table_equals(const tsk_individual_table_t *self, + const tsk_individual_table_t *other, tsk_flags_t options) +{ + bool ret + = self->num_rows == other->num_rows + && memcmp(self->flags, other->flags, self->num_rows * sizeof(tsk_flags_t)) == 0 + && memcmp(self->location_offset, other->location_offset, + (self->num_rows + 1) * sizeof(tsk_size_t)) + == 0 + && memcmp( + self->location, other->location, self->location_length * sizeof(double)) + == 0; + if (!(options & TSK_CMP_IGNORE_METADATA)) { + ret = ret && self->metadata_length == other->metadata_length + && self->metadata_schema_length == other->metadata_schema_length && memcmp(self->metadata_offset, other->metadata_offset, (self->num_rows + 1) * sizeof(tsk_size_t)) == 0 @@ -1153,21 +1153,22 @@ tsk_node_table_dump_text(const tsk_node_table_t *self, FILE *out) } bool -tsk_node_table_equals(const tsk_node_table_t *self, const tsk_node_table_t *other) -{ - bool ret = false; - if (self->num_rows == other->num_rows - && self->metadata_length == other->metadata_length - && self->metadata_schema_length == other->metadata_schema_length) { - ret = memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0 - && memcmp(self->flags, other->flags, self->num_rows * sizeof(tsk_flags_t)) - == 0 - && memcmp(self->population, other->population, - self->num_rows * sizeof(tsk_id_t)) - == 0 - && memcmp(self->individual, other->individual, - self->num_rows * sizeof(tsk_id_t)) - == 0 +tsk_node_table_equals( + const tsk_node_table_t *self, const tsk_node_table_t *other, tsk_flags_t options) +{ + bool ret + = self->num_rows == other->num_rows + && memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0 + && memcmp(self->flags, other->flags, self->num_rows * sizeof(tsk_flags_t)) == 0 + && memcmp( + self->population, other->population, self->num_rows * sizeof(tsk_id_t)) + == 0 + && memcmp( + self->individual, other->individual, self->num_rows * sizeof(tsk_id_t)) + == 0; + if (!(options & TSK_CMP_IGNORE_METADATA)) { + ret = ret && self->metadata_length == other->metadata_length + && self->metadata_schema_length == other->metadata_schema_length && memcmp(self->metadata_offset, other->metadata_offset, (self->num_rows + 1) * sizeof(tsk_size_t)) == 0 @@ -1684,39 +1685,41 @@ tsk_edge_table_dump_text(const tsk_edge_table_t *self, FILE *out) } bool -tsk_edge_table_equals(const tsk_edge_table_t *self, const tsk_edge_table_t *other) +tsk_edge_table_equals( + const tsk_edge_table_t *self, const tsk_edge_table_t *other, tsk_flags_t options) { - bool ret = false; bool metadata_equal; - - if (self->num_rows == other->num_rows - && self->metadata_length == other->metadata_length - && self->metadata_schema_length == other->metadata_schema_length) { - if (tsk_edge_table_has_metadata(self) && tsk_edge_table_has_metadata(other)) { - metadata_equal = memcmp(self->metadata_offset, other->metadata_offset, - (self->num_rows + 1) * sizeof(tsk_size_t)) - == 0 - && memcmp(self->metadata, other->metadata, - self->metadata_length * sizeof(char)) - == 0; - - } else { - /* The only way that the metadata lengths can be equal (which - * we've already tests) if either one or the other of the tables - * hasn't got metadata is if they are both zero. */ - tsk_bug_assert(self->metadata_length == 0); - metadata_equal = true; - } - ret = memcmp(self->left, other->left, self->num_rows * sizeof(double)) == 0 - && memcmp(self->right, other->right, self->num_rows * sizeof(double)) == 0 - && memcmp(self->parent, other->parent, self->num_rows * sizeof(tsk_id_t)) - == 0 - && memcmp(self->child, other->child, self->num_rows * sizeof(tsk_id_t)) - == 0 - && metadata_equal + bool ret + = self->num_rows == other->num_rows + && memcmp(self->left, other->left, self->num_rows * sizeof(double)) == 0 + && memcmp(self->right, other->right, self->num_rows * sizeof(double)) == 0 + && memcmp(self->parent, other->parent, self->num_rows * sizeof(tsk_id_t)) == 0 + && memcmp(self->child, other->child, self->num_rows * sizeof(tsk_id_t)) == 0; + + if (!(options & TSK_CMP_IGNORE_METADATA)) { + ret = ret && self->metadata_schema_length == other->metadata_schema_length && memcmp(self->metadata_schema, other->metadata_schema, self->metadata_schema_length * sizeof(char)) == 0; + metadata_equal = false; + if (self->metadata_length == other->metadata_length) { + if (tsk_edge_table_has_metadata(self) + && tsk_edge_table_has_metadata(other)) { + metadata_equal = memcmp(self->metadata_offset, other->metadata_offset, + (self->num_rows + 1) * sizeof(tsk_size_t)) + == 0 + && memcmp(self->metadata, other->metadata, + self->metadata_length * sizeof(char)) + == 0; + } else { + /* The only way that the metadata lengths can be equal (which + * we've already tested) and either one or the other of the tables + * hasn't got metadata is if they are both zero length. */ + tsk_bug_assert(self->metadata_length == 0); + metadata_equal = true; + } + } + ret = ret && metadata_equal; } return ret; } @@ -2163,21 +2166,23 @@ tsk_site_table_set_columns(tsk_site_table_t *self, tsk_size_t num_rows, } bool -tsk_site_table_equals(const tsk_site_table_t *self, const tsk_site_table_t *other) -{ - bool ret = false; - if (self->num_rows == other->num_rows - && self->ancestral_state_length == other->ancestral_state_length - && self->metadata_length == other->metadata_length - && self->metadata_schema_length == other->metadata_schema_length) { - ret = memcmp(self->position, other->position, self->num_rows * sizeof(double)) - == 0 - && memcmp(self->ancestral_state_offset, other->ancestral_state_offset, - (self->num_rows + 1) * sizeof(tsk_size_t)) - == 0 - && memcmp(self->ancestral_state, other->ancestral_state, - self->ancestral_state_length * sizeof(char)) - == 0 +tsk_site_table_equals( + const tsk_site_table_t *self, const tsk_site_table_t *other, tsk_flags_t options) +{ + bool ret + = self->num_rows == other->num_rows + && self->ancestral_state_length == other->ancestral_state_length + && memcmp(self->position, other->position, self->num_rows * sizeof(double)) + == 0 + && memcmp(self->ancestral_state_offset, other->ancestral_state_offset, + (self->num_rows + 1) * sizeof(tsk_size_t)) + == 0 + && memcmp(self->ancestral_state, other->ancestral_state, + self->ancestral_state_length * sizeof(char)) + == 0; + if (!(options & TSK_CMP_IGNORE_METADATA)) { + ret = ret && self->metadata_length == other->metadata_length + && self->metadata_schema_length == other->metadata_schema_length && memcmp(self->metadata_offset, other->metadata_offset, (self->num_rows + 1) * sizeof(tsk_size_t)) == 0 @@ -2740,25 +2745,25 @@ tsk_mutation_table_set_columns(tsk_mutation_table_t *self, tsk_size_t num_rows, } bool -tsk_mutation_table_equals( - const tsk_mutation_table_t *self, const tsk_mutation_table_t *other) -{ - bool ret = false; - if (self->num_rows == other->num_rows - && self->derived_state_length == other->derived_state_length - && self->metadata_length == other->metadata_length - && self->metadata_schema_length == other->metadata_schema_length) { - ret = memcmp(self->site, other->site, self->num_rows * sizeof(tsk_id_t)) == 0 - && memcmp(self->node, other->node, self->num_rows * sizeof(tsk_id_t)) == 0 - && memcmp(self->parent, other->parent, self->num_rows * sizeof(tsk_id_t)) - == 0 - && memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0 - && memcmp(self->derived_state_offset, other->derived_state_offset, - (self->num_rows + 1) * sizeof(tsk_size_t)) - == 0 - && memcmp(self->derived_state, other->derived_state, - self->derived_state_length * sizeof(char)) - == 0 +tsk_mutation_table_equals(const tsk_mutation_table_t *self, + const tsk_mutation_table_t *other, tsk_flags_t options) +{ + bool ret + = self->num_rows == other->num_rows + && self->derived_state_length == other->derived_state_length + && memcmp(self->site, other->site, self->num_rows * sizeof(tsk_id_t)) == 0 + && memcmp(self->node, other->node, self->num_rows * sizeof(tsk_id_t)) == 0 + && memcmp(self->parent, other->parent, self->num_rows * sizeof(tsk_id_t)) == 0 + && memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0 + && memcmp(self->derived_state_offset, other->derived_state_offset, + (self->num_rows + 1) * sizeof(tsk_size_t)) + == 0 + && memcmp(self->derived_state, other->derived_state, + self->derived_state_length * sizeof(char)) + == 0; + if (!(options & TSK_CMP_IGNORE_METADATA)) { + ret = ret && self->metadata_length == other->metadata_length + && self->metadata_schema_length == other->metadata_schema_length && memcmp(self->metadata_offset, other->metadata_offset, (self->num_rows + 1) * sizeof(tsk_size_t)) == 0 @@ -3379,20 +3384,21 @@ tsk_migration_table_dump_text(const tsk_migration_table_t *self, FILE *out) } bool -tsk_migration_table_equals( - const tsk_migration_table_t *self, const tsk_migration_table_t *other) -{ - bool ret = false; - if (self->num_rows == other->num_rows - && self->metadata_length == other->metadata_length - && self->metadata_schema_length == other->metadata_schema_length) { - ret = memcmp(self->left, other->left, self->num_rows * sizeof(double)) == 0 - && memcmp(self->right, other->right, self->num_rows * sizeof(double)) == 0 - && memcmp(self->node, other->node, self->num_rows * sizeof(tsk_id_t)) == 0 - && memcmp(self->source, other->source, self->num_rows * sizeof(tsk_id_t)) - == 0 - && memcmp(self->dest, other->dest, self->num_rows * sizeof(tsk_id_t)) == 0 - && memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0 +tsk_migration_table_equals(const tsk_migration_table_t *self, + const tsk_migration_table_t *other, tsk_flags_t options) +{ + bool ret + = self->num_rows == other->num_rows + && memcmp(self->left, other->left, self->num_rows * sizeof(double)) == 0 + && memcmp(self->right, other->right, self->num_rows * sizeof(double)) == 0 + && memcmp(self->node, other->node, self->num_rows * sizeof(tsk_id_t)) == 0 + && memcmp(self->source, other->source, self->num_rows * sizeof(tsk_id_t)) == 0 + && memcmp(self->dest, other->dest, self->num_rows * sizeof(tsk_id_t)) == 0 + && memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0; + + if (!(options & TSK_CMP_IGNORE_METADATA)) { + ret = ret && self->metadata_length == other->metadata_length + && self->metadata_schema_length == other->metadata_schema_length && memcmp(self->metadata_offset, other->metadata_offset, (self->num_rows + 1) * sizeof(tsk_size_t)) == 0 @@ -3818,16 +3824,19 @@ tsk_population_table_dump_text(const tsk_population_table_t *self, FILE *out) } bool -tsk_population_table_equals( - const tsk_population_table_t *self, const tsk_population_table_t *other) -{ - bool ret = false; - if (self->num_rows == other->num_rows - && self->metadata_length == other->metadata_length - && self->metadata_schema_length == other->metadata_schema_length) { - ret = memcmp(self->metadata_offset, other->metadata_offset, - (self->num_rows + 1) * sizeof(tsk_size_t)) - == 0 +tsk_population_table_equals(const tsk_population_table_t *self, + const tsk_population_table_t *other, tsk_flags_t options) +{ + /* Since we only have the metadata column in the table currently, equality + * reduces to comparing the number of rows if we disable metadata comparison. + */ + bool ret = self->num_rows == other->num_rows; + if (!(options & TSK_CMP_IGNORE_METADATA)) { + ret = ret && self->metadata_length == other->metadata_length + && self->metadata_schema_length == other->metadata_schema_length + && memcmp(self->metadata_offset, other->metadata_offset, + (self->num_rows + 1) * sizeof(tsk_size_t)) + == 0 && memcmp(self->metadata, other->metadata, self->metadata_length * sizeof(char)) == 0 @@ -4298,22 +4307,23 @@ tsk_provenance_table_dump_text(const tsk_provenance_table_t *self, FILE *out) } bool -tsk_provenance_table_equals( - const tsk_provenance_table_t *self, const tsk_provenance_table_t *other) -{ - bool ret = false; - if (self->num_rows == other->num_rows - && self->timestamp_length == other->timestamp_length) { - ret = memcmp(self->timestamp_offset, other->timestamp_offset, - (self->num_rows + 1) * sizeof(tsk_size_t)) - == 0 - && memcmp(self->timestamp, other->timestamp, - self->timestamp_length * sizeof(char)) - == 0 - && memcmp(self->record_offset, other->record_offset, +tsk_provenance_table_equals(const tsk_provenance_table_t *self, + const tsk_provenance_table_t *other, tsk_flags_t options) +{ + bool ret = self->num_rows == other->num_rows + && self->record_length == other->record_length + && memcmp(self->record_offset, other->record_offset, + (self->num_rows + 1) * sizeof(tsk_size_t)) + == 0 + && memcmp(self->record, other->record, self->record_length * sizeof(char)) + == 0; + if (!(options & TSK_CMP_IGNORE_TIMESTAMPS)) { + ret = ret && self->timestamp_length == other->timestamp_length + && memcmp(self->timestamp_offset, other->timestamp_offset, (self->num_rows + 1) * sizeof(tsk_size_t)) == 0 - && memcmp(self->record, other->record, self->record_length * sizeof(char)) + && memcmp(self->timestamp, other->timestamp, + self->timestamp_length * sizeof(char)) == 0; } return ret; @@ -4402,11 +4412,12 @@ cmp_site(const void *a, const void *b) /* Compare sites by position */ int ret = (ia->position > ib->position) - (ia->position < ib->position); if (ret == 0) { - /* Within a particular position sort by ID. This ensures that relative ordering - * of multiple sites at the same position is maintained; the redundant sites - * will get compacted down by clean_tables(), but in the meantime if the order - * of the redundant sites changes it will cause the sort order of mutations to - * be corrupted, as the mutations will follow their sites. */ + /* Within a particular position sort by ID. This ensures that relative + * ordering of multiple sites at the same position is maintained; the + * redundant sites will get compacted down by clean_tables(), but in the + * meantime if the order of the redundant sites changes it will cause the + * sort order of mutations to be corrupted, as the mutations will follow + * their sites. */ ret = (ia->id > ib->id) - (ia->id < ib->id); } return ret; @@ -4532,7 +4543,8 @@ tsk_table_sorter_sort_sites(tsk_table_sorter_t *self) /* Sort the sites by position */ qsort(sorted_sites, num_sites, sizeof(*sorted_sites), cmp_site); - /* Build the mapping from old site IDs to new site IDs and copy back into the table + /* Build the mapping from old site IDs to new site IDs and copy back into the + * table */ tsk_site_table_clear(sites); for (j = 0; j < num_sites; j++) { @@ -5156,7 +5168,8 @@ ancestor_mapper_init(ancestor_mapper_t *self, tsk_id_t *samples, size_t num_samp goto out; } - /* Allocate the heaps used for small objects-> Assuming 8K is a good chunk size */ + /* Allocate the heaps used for small objects-> Assuming 8K is a good chunk size + */ ret = tsk_blkalloc_init(&self->segment_heap, 8192); if (ret != 0) { goto out; @@ -6501,7 +6514,8 @@ simplifier_init(simplifier_t *self, const tsk_id_t *samples, size_t num_samples, } memcpy(self->samples, samples, num_samples * sizeof(tsk_id_t)); - /* Allocate the heaps used for small objects-> Assuming 8K is a good chunk size */ + /* Allocate the heaps used for small objects-> Assuming 8K is a good chunk size + */ ret = tsk_blkalloc_init(&self->segment_heap, 8192); if (ret != 0) { goto out; @@ -6929,8 +6943,8 @@ simplifier_finalise_references(simplifier_t *self) goto out; } - /* TODO Migrations fit reasonably neatly into the pattern that we have here. We can - * consider references to populations from migration objects in the same way + /* TODO Migrations fit reasonably neatly into the pattern that we have here. We + * can consider references to populations from migration objects in the same way * as from nodes, so that we only remove a population if its referenced by * neither. Mapping the population IDs in migrations is then easy. In principle * nodes are similar, but the semantics are slightly different because we've @@ -7513,8 +7527,8 @@ tsk_table_collection_check_mutation_integrity( ret = TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION; goto out; } - /* Check time ordering, we do this after the time checks above, so that - more specific errors trigger first */ + /* Check time ordering, we do this after the time checks above, so + that more specific errors trigger first */ if (mutation_time > last_known_time) { ret = TSK_ERR_UNSORTED_MUTATIONS; goto out; @@ -7848,47 +7862,45 @@ tsk_table_collection_free(tsk_table_collection_t *self) return 0; } -/* Returns true if all the tables and collection metadata are equal. Note - * this does *not* consider the indexes, since these are derived from the - * tables. We do not consider the file_uuids either, since this is a property of - * the file that set of tables is stored in. */ bool -tsk_table_collection_equals_with_options(const tsk_table_collection_t *self, +tsk_table_collection_equals(const tsk_table_collection_t *self, const tsk_table_collection_t *other, tsk_flags_t options) { - bool ignore_metadata = !!(options & TSK_IGNORE_TOP_LEVEL_METADATA); - bool ignore_provenance = !!(options & TSK_IGNORE_PROVENANCE); bool ret = self->sequence_length == other->sequence_length - && (ignore_metadata - || (self->metadata_length == other->metadata_length - && self->metadata_schema_length == other->metadata_schema_length - && memcmp(self->metadata, other->metadata, - self->metadata_length * sizeof(char)) - == 0 - && memcmp(self->metadata_schema, other->metadata_schema, - self->metadata_schema_length * sizeof(char)) - == 0)) - && tsk_individual_table_equals(&self->individuals, &other->individuals) - && tsk_node_table_equals(&self->nodes, &other->nodes) - && tsk_edge_table_equals(&self->edges, &other->edges) - && tsk_migration_table_equals(&self->migrations, &other->migrations) - && tsk_site_table_equals(&self->sites, &other->sites) - && tsk_mutation_table_equals(&self->mutations, &other->mutations) - && tsk_population_table_equals(&self->populations, &other->populations) - && (ignore_provenance - || tsk_provenance_table_equals( - &self->provenances, &other->provenances)); + && tsk_individual_table_equals( + &self->individuals, &other->individuals, options) + && tsk_node_table_equals(&self->nodes, &other->nodes, options) + && tsk_edge_table_equals(&self->edges, &other->edges, options) + && tsk_migration_table_equals(&self->migrations, &other->migrations, options) + && tsk_site_table_equals(&self->sites, &other->sites, options) + && tsk_mutation_table_equals(&self->mutations, &other->mutations, options) + && tsk_population_table_equals( + &self->populations, &other->populations, options); + + /* TSK_CMP_IGNORE_TS_METADATA is implied by TSK_CMP_IGNORE_METADATA */ + if (options & TSK_CMP_IGNORE_METADATA) { + options |= TSK_CMP_IGNORE_TS_METADATA; + } + if (!(options & TSK_CMP_IGNORE_TS_METADATA)) { + ret = ret + && (self->metadata_length == other->metadata_length + && self->metadata_schema_length == other->metadata_schema_length + && memcmp(self->metadata, other->metadata, + self->metadata_length * sizeof(char)) + == 0 + && memcmp(self->metadata_schema, other->metadata_schema, + self->metadata_schema_length * sizeof(char)) + == 0); + } + if (!(options & TSK_CMP_IGNORE_PROVENANCE)) { + ret = ret + && tsk_provenance_table_equals( + &self->provenances, &other->provenances, options); + } return ret; } -bool -tsk_table_collection_equals( - const tsk_table_collection_t *self, const tsk_table_collection_t *other) -{ - return tsk_table_collection_equals_with_options(self, other, 0); -} - int tsk_table_collection_set_metadata( tsk_table_collection_t *self, const char *metadata, tsk_size_t metadata_length) @@ -8898,8 +8910,9 @@ tsk_table_collection_compute_mutation_times( denominator[mutations.node[mutation]]++; mutation++; } - /* Go over the mutations again assigning times. As the sorting requirements - guarantee that parents are before children, we assign oldest first */ + /* Go over the mutations again assigning times. As the sorting + requirements guarantee that parents are before children, we assign + oldest first */ for (j = first_mutation; j < mutation; j++) { u = mutations.node[j]; numerator[u]++; @@ -9259,7 +9272,7 @@ tsk_check_subset_equality(tsk_table_collection_t *self, if (ret != 0) { goto out; } - if (!tsk_table_collection_equals(&self_copy, &other_copy)) { + if (!tsk_table_collection_equals(&self_copy, &other_copy, 0)) { ret = TSK_ERR_UNION_DIFF_HISTORIES; goto out; } diff --git a/c/tskit/tables.h b/c/tskit/tables.h index b1e6d8ea04..ea5ca102fb 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -722,8 +722,10 @@ typedef struct { #define TSK_UNION_NO_ADD_POP (1 << 1) /* Flags for table collection equals */ -#define TSK_IGNORE_TOP_LEVEL_METADATA (1 << 0) -#define TSK_IGNORE_PROVENANCE (1 << 1) +#define TSK_CMP_IGNORE_TS_METADATA (1 << 0) +#define TSK_CMP_IGNORE_PROVENANCE (1 << 1) +#define TSK_CMP_IGNORE_METADATA (1 << 2) +#define TSK_CMP_IGNORE_TIMESTAMPS (1 << 3) /****************************************************************************/ /* Function signatures */ @@ -813,12 +815,27 @@ int tsk_individual_table_truncate(tsk_individual_table_t *self, tsk_size_t num_r @brief Returns true if the data in the specified table is identical to the data in this table. +@rst + +**Options** + +Options to control the comparison can be specified by providing one or +more of the following bitwise flags. By default (options=0) tables are +considered equal if they are byte-wise identical in all columns, +and their metadata schemas are byte-wise identical. + +TSK_CMP_IGNORE_METADATA + Do not include metadata or metadata schemas in the comparison. + +@endrst + @param self A pointer to a tsk_individual_table_t object. @param other A pointer to a tsk_individual_table_t object. +@param options Bitwise comparison options. @return Return true if the specified table is equal to this table. */ -bool tsk_individual_table_equals( - const tsk_individual_table_t *self, const tsk_individual_table_t *other); +bool tsk_individual_table_equals(const tsk_individual_table_t *self, + const tsk_individual_table_t *other, tsk_flags_t options); /** @brief Copies the state of this table into the specified destination. @@ -987,11 +1004,27 @@ int tsk_node_table_truncate(tsk_node_table_t *self, tsk_size_t num_rows); @brief Returns true if the data in the specified table is identical to the data in this table. +@rst + +**Options** + +Options to control the comparison can be specified by providing one or +more of the following bitwise flags. By default (options=0) tables are +considered equal if they are byte-wise identical in all columns, +and their metadata schemas are byte-wise identical. + +TSK_CMP_IGNORE_METADATA + Do not include metadata or metadata schemas in the comparison. + +@endrst + @param self A pointer to a tsk_node_table_t object. @param other A pointer to a tsk_node_table_t object. +@param options Bitwise comparison options. @return Return true if the specified table is equal to this table. */ -bool tsk_node_table_equals(const tsk_node_table_t *self, const tsk_node_table_t *other); +bool tsk_node_table_equals( + const tsk_node_table_t *self, const tsk_node_table_t *other, tsk_flags_t options); /** @brief Copies the state of this table into the specified destination. @@ -1098,8 +1131,7 @@ TSK_NO_METADATA @endrst @param self A pointer to an uninitialised tsk_edge_table_t object. -@param options Allocation time options. Currently unused; should be - set to zero to ensure compatibility with later versions of tskit. +@param options Allocation time options. @return Return 0 on success or a negative value on failure. */ int tsk_edge_table_init(tsk_edge_table_t *self, tsk_flags_t options); @@ -1163,11 +1195,27 @@ int tsk_edge_table_truncate(tsk_edge_table_t *self, tsk_size_t num_rows); @brief Returns true if the data in the specified table is identical to the data in this table. +@rst + +**Options** + +Options to control the comparison can be specified by providing one or +more of the following bitwise flags. By default (options=0) tables are +considered equal if they are byte-wise identical in all columns, +and their metadata schemas are byte-wise identical. + +TSK_CMP_IGNORE_METADATA + Do not include metadata or metadata schemas in the comparison. + +@endrst + @param self A pointer to a tsk_edge_table_t object. @param other A pointer to a tsk_edge_table_t object. +@param options Bitwise comparison options. @return Return true if the specified table is equal to this table. */ -bool tsk_edge_table_equals(const tsk_edge_table_t *self, const tsk_edge_table_t *other); +bool tsk_edge_table_equals( + const tsk_edge_table_t *self, const tsk_edge_table_t *other, tsk_flags_t options); /** @brief Copies the state of this table into the specified destination. @@ -1334,12 +1382,27 @@ int tsk_migration_table_truncate(tsk_migration_table_t *self, tsk_size_t num_row @brief Returns true if the data in the specified table is identical to the data in this table. +@rst + +**Options** + +Options to control the comparison can be specified by providing one or +more of the following bitwise flags. By default (options=0) tables are +considered equal if they are byte-wise identical in all columns, +and their metadata schemas are byte-wise identical. + +TSK_CMP_IGNORE_METADATA + Do not include metadata or metadata schemas in the comparison. + +@endrst + @param self A pointer to a tsk_migration_table_t object. @param other A pointer to a tsk_migration_table_t object. +@param options Bitwise comparison options. @return Return true if the specified table is equal to this table. */ -bool tsk_migration_table_equals( - const tsk_migration_table_t *self, const tsk_migration_table_t *other); +bool tsk_migration_table_equals(const tsk_migration_table_t *self, + const tsk_migration_table_t *other, tsk_flags_t options); /** @brief Copies the state of this table into the specified destination. @@ -1502,11 +1565,27 @@ int tsk_site_table_truncate(tsk_site_table_t *self, tsk_size_t num_rows); @brief Returns true if the data in the specified table is identical to the data in this table. +@rst + +**Options** + +Options to control the comparison can be specified by providing one or +more of the following bitwise flags. By default (options=0) tables are +considered equal if they are byte-wise identical in all columns, +and their metadata schemas are byte-wise identical. + +TSK_CMP_IGNORE_METADATA + Do not include metadata or metadata schemas in the comparison. + +@endrst + @param self A pointer to a tsk_site_table_t object. @param other A pointer to a tsk_site_table_t object. +@param options Bitwise comparison options. @return Return true if the specified table is equal to this table. */ -bool tsk_site_table_equals(const tsk_site_table_t *self, const tsk_site_table_t *other); +bool tsk_site_table_equals( + const tsk_site_table_t *self, const tsk_site_table_t *other, tsk_flags_t options); /** @brief Copies the state of this table into the specified destination. @@ -1674,12 +1753,27 @@ int tsk_mutation_table_truncate(tsk_mutation_table_t *self, tsk_size_t num_rows) @brief Returns true if the data in the specified table is identical to the data in this table. +@rst + +**Options** + +Options to control the comparison can be specified by providing one or +more of the following bitwise flags. By default (options=0) tables are +considered equal if they are byte-wise identical in all columns, +and their metadata schemas are byte-wise identical. + +TSK_CMP_IGNORE_METADATA + Do not include metadata or metadata schemas in the comparison. + +@endrst + @param self A pointer to a tsk_mutation_table_t object. @param other A pointer to a tsk_mutation_table_t object. +@param options Bitwise comparison options. @return Return true if the specified table is equal to this table. */ -bool tsk_mutation_table_equals( - const tsk_mutation_table_t *self, const tsk_mutation_table_t *other); +bool tsk_mutation_table_equals(const tsk_mutation_table_t *self, + const tsk_mutation_table_t *other, tsk_flags_t options); /** @brief Copies the state of this table into the specified destination. @@ -1841,12 +1935,29 @@ int tsk_population_table_truncate(tsk_population_table_t *self, tsk_size_t num_r @brief Returns true if the data in the specified table is identical to the data in this table. +@rst + +**Options** + +Options to control the comparison can be specified by providing one or +more of the following bitwise flags. By default (options=0) tables are +considered equal if they are byte-wise identical in all columns, +and their metadata schemas are byte-wise identical. + +TSK_CMP_IGNORE_METADATA + Do not include metadata in the comparison. Note that as metadata is the + only column in the population table, two population tables are considered + equal if they have the same number of rows if this flag is specified. + +@endrst + @param self A pointer to a tsk_population_table_t object. @param other A pointer to a tsk_population_table_t object. +@param options Bitwise comparison options. @return Return true if the specified table is equal to this table. */ -bool tsk_population_table_equals( - const tsk_population_table_t *self, const tsk_population_table_t *other); +bool tsk_population_table_equals(const tsk_population_table_t *self, + const tsk_population_table_t *other, tsk_flags_t options); /** @brief Copies the state of this table into the specified destination. @@ -2004,12 +2115,27 @@ int tsk_provenance_table_truncate(tsk_provenance_table_t *self, tsk_size_t num_r @brief Returns true if the data in the specified table is identical to the data in this table. +@rst + +**Options** + +Options to control the comparison can be specified by providing one or +more of the following bitwise flags. By default (options=0) tables are +considered equal if they are byte-wise identical in all columns. + +TSK_CMP_IGNORE_TIMESTAMPS + Do not include the timestamp column when comparing provenance + tables. + +@endrst + @param self A pointer to a tsk_provenance_table_t object. @param other A pointer to a tsk_provenance_table_t object. +@param options Bitwise comparison options. @return Return true if the specified table is equal to this table. */ -bool tsk_provenance_table_equals( - const tsk_provenance_table_t *self, const tsk_provenance_table_t *other); +bool tsk_provenance_table_equals(const tsk_provenance_table_t *self, + const tsk_provenance_table_t *other, tsk_flags_t options); /** @brief Copies the state of this table into the specified destination. @@ -2138,44 +2264,46 @@ No memory is freed as a result of this operation; please use int tsk_table_collection_clear(tsk_table_collection_t *self); /** -@brief Returns true if the data in the specified table collection is identical to the -data in this table, with options to allow partial equality. +@brief Returns true if the data in the specified table collection is equal + to the data in this table collection. @rst -Returns true if the data in all of the table columns are byte-by-byte equal -and the sequence lengths of the two table collections are equal. Indexes are -not considered when determining equality, since they are derived from the -basic data. You may disable the comparison of top-level metadata and -metadata schema (`TSK_IGNORE_TOP_LEVEL_METADATA`) or the provenance table -(`TSK_IGNORE_PROVENANCE`). Note that table row-level metadata and table schemas -are always checked. -@endrst +Returns true if the two table collections are equal. The indexes are +not considered as these are derived from the tables. We also do not +consider the ``file_uuid``, since it is a property of the file that set +of tables is stored in. -@param self A pointer to a tsk_table_collection_t object. -@param other A pointer to a tsk_table_collection_t object. -@param options Bitwise options. See above for details. -@return Return true if the specified table collection is equal to this table. -*/ -bool tsk_table_collection_equals_with_options(const tsk_table_collection_t *self, - const tsk_table_collection_t *other, tsk_flags_t options); - -/** -@brief Returns true if the data in the specified table collection is identical to the -data in this table. +**Options** -@rst -Returns true if the data in all of the table columns are byte-by-byte equal -and the sequence lengths of the two table collections are equal. Indexes are not -considered when determining equality, since they are derived from the basic data. +Options to control the comparison can be specified by providing one or +more of the following bitwise flags. By default (options=0) two table +collections are considered equal if all of the tables are byte-wise +identical, and the sequence lengths, metadata and metadata schemas +of the two table collections are identical. + +TSK_CMP_IGNORE_PROVENANCE + Do not include the provenance table in comparison. +TSK_CMP_IGNORE_METADATA + Do not include metadata when comparing the table collections. + This includes both the top-level tree sequence metadata as well as the + metadata for each of the tables (i.e, TSK_CMP_IGNORE_TS_METADATA is implied). + All metadata schemas are also ignored. +TSK_CMP_IGNORE_TS_METADATA + Do not include the top-level tree sequence metadata and metadata schemas + in the comparison. +TSK_CMP_IGNORE_TIMESTAMPS + Do not include the timestamp information when comparing the provenance + tables. This has no effect if TSK_CMP_IGNORE_PROVENANCE is specified. @endrst @param self A pointer to a tsk_table_collection_t object. @param other A pointer to a tsk_table_collection_t object. +@param options Bitwise comparison options. @return Return true if the specified table collection is equal to this table. */ -bool tsk_table_collection_equals( - const tsk_table_collection_t *self, const tsk_table_collection_t *other); +bool tsk_table_collection_equals(const tsk_table_collection_t *self, + const tsk_table_collection_t *other, tsk_flags_t options); /** @brief Copies the state of this table collection into the specified destination. diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index ef59ce0c61..7da66a663c 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -4,9 +4,11 @@ **Features** -- Added ``TableCollection.equals``, a method that compares two table - collections with the options to ignore top-level metadata/schema or - provenance tables. (:user:`mufernando`, :issue:`896`, :pr:`897`). +- Added ``equals`` method to TableCollection and each of the tables which + provides more flexible equality comparisons, for example, allowing + users to ignore metadata or provenance in the comparison. + (:user:`mufernando`, :user:`jeromekelleher`, :issue:`896`, :pr:`897`, + :issue:`913`, :pr:`917`). - ``ts.dump`` and ``tskit.load`` now support reading and writing file objects such as FIFOs and sockets. (:user:`benjeffery`, :issue:`657`, :pr:`909`) diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c index 6db6924d9c..a99cab7898 100644 --- a/python/_tskitmodule.c +++ b/python/_tskitmodule.c @@ -859,18 +859,27 @@ IndividualTable_add_row(IndividualTable *self, PyObject *args, PyObject *kwds) static PyTypeObject IndividualTableType; static PyObject * -IndividualTable_equals(IndividualTable *self, PyObject *args) +IndividualTable_equals(IndividualTable *self, PyObject *args, PyObject *kwds) { PyObject *ret = NULL; IndividualTable *other = NULL; + tsk_flags_t options = 0; + int ignore_metadata = false; + static char *kwlist[] = { "other", "ignore_metadata", NULL }; - if (IndividualTable_check_state(self) != 0) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|i", kwlist, &IndividualTableType, + &other, &ignore_metadata)) { goto out; } - if (!PyArg_ParseTuple(args, "O!", &IndividualTableType, &other)) { + if (IndividualTable_check_state(self) != 0 + || IndividualTable_check_state(other) != 0) { goto out; } - ret = Py_BuildValue("i", tsk_individual_table_equals(self->table, other->table)); + if (ignore_metadata) { + options |= TSK_CMP_IGNORE_METADATA; + } + ret = Py_BuildValue( + "i", tsk_individual_table_equals(self->table, other->table, options)); out: return ret; } @@ -1168,7 +1177,7 @@ static PyMethodDef IndividualTable_methods[] = { .ml_doc = "Returns the kth row in this table." }, { .ml_name = "equals", .ml_meth = (PyCFunction) IndividualTable_equals, - .ml_flags = METH_VARARGS, + .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = "Returns true if the specified individual table is equal." }, { .ml_name = "append_columns", .ml_meth = (PyCFunction) IndividualTable_append_columns, @@ -1315,18 +1324,25 @@ NodeTable_add_row(NodeTable *self, PyObject *args, PyObject *kwds) static PyTypeObject NodeTableType; static PyObject * -NodeTable_equals(NodeTable *self, PyObject *args) +NodeTable_equals(NodeTable *self, PyObject *args, PyObject *kwds) { PyObject *ret = NULL; NodeTable *other = NULL; + tsk_flags_t options = 0; + int ignore_metadata = false; + static char *kwlist[] = { "other", "ignore_metadata", NULL }; - if (NodeTable_check_state(self) != 0) { + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O!|i", kwlist, &NodeTableType, &other, &ignore_metadata)) { goto out; } - if (!PyArg_ParseTuple(args, "O!", &NodeTableType, &other)) { + if (NodeTable_check_state(self) != 0 || NodeTable_check_state(other) != 0) { goto out; } - ret = Py_BuildValue("i", tsk_node_table_equals(self->table, other->table)); + if (ignore_metadata) { + options |= TSK_CMP_IGNORE_METADATA; + } + ret = Py_BuildValue("i", tsk_node_table_equals(self->table, other->table, options)); out: return ret; } @@ -1633,7 +1649,7 @@ static PyMethodDef NodeTable_methods[] = { .ml_doc = "Adds a new row to this table." }, { .ml_name = "equals", .ml_meth = (PyCFunction) NodeTable_equals, - .ml_flags = METH_VARARGS, + .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = "Returns True if the specified NodeTable is equal to this one." }, { .ml_name = "get_row", .ml_meth = (PyCFunction) NodeTable_get_row, @@ -1781,18 +1797,25 @@ EdgeTable_add_row(EdgeTable *self, PyObject *args, PyObject *kwds) static PyTypeObject EdgeTableType; static PyObject * -EdgeTable_equals(EdgeTable *self, PyObject *args) +EdgeTable_equals(EdgeTable *self, PyObject *args, PyObject *kwds) { PyObject *ret = NULL; EdgeTable *other = NULL; + tsk_flags_t options = 0; + int ignore_metadata = false; + static char *kwlist[] = { "other", "ignore_metadata", NULL }; - if (EdgeTable_check_state(self) != 0) { + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O!|i", kwlist, &EdgeTableType, &other, &ignore_metadata)) { goto out; } - if (!PyArg_ParseTuple(args, "O!", &EdgeTableType, &other)) { + if (EdgeTable_check_state(self) != 0 || EdgeTable_check_state(other) != 0) { goto out; } - ret = Py_BuildValue("i", tsk_edge_table_equals(self->table, other->table)); + if (ignore_metadata) { + options |= TSK_CMP_IGNORE_METADATA; + } + ret = Py_BuildValue("i", tsk_edge_table_equals(self->table, other->table, options)); out: return ret; } @@ -2116,7 +2139,7 @@ static PyMethodDef EdgeTable_methods[] = { .ml_doc = "Adds a new row to this table." }, { .ml_name = "equals", .ml_meth = (PyCFunction) EdgeTable_equals, - .ml_flags = METH_VARARGS, + .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = "Returns True if the specified EdgeTable is equal to this one." }, { .ml_name = "get_row", .ml_meth = (PyCFunction) EdgeTable_get_row, @@ -2267,18 +2290,27 @@ MigrationTable_add_row(MigrationTable *self, PyObject *args, PyObject *kwds) static PyTypeObject MigrationTableType; static PyObject * -MigrationTable_equals(MigrationTable *self, PyObject *args) +MigrationTable_equals(MigrationTable *self, PyObject *args, PyObject *kwds) { PyObject *ret = NULL; MigrationTable *other = NULL; + tsk_flags_t options = 0; + int ignore_metadata = false; + static char *kwlist[] = { "other", "ignore_metadata", NULL }; - if (MigrationTable_check_state(self) != 0) { + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O!|i", kwlist, &MigrationTableType, &other, &ignore_metadata)) { goto out; } - if (!PyArg_ParseTuple(args, "O!", &MigrationTableType, &other)) { + if (MigrationTable_check_state(self) != 0 + || MigrationTable_check_state(other) != 0) { goto out; } - ret = Py_BuildValue("i", tsk_migration_table_equals(self->table, other->table)); + if (ignore_metadata) { + options |= TSK_CMP_IGNORE_METADATA; + } + ret = Py_BuildValue( + "i", tsk_migration_table_equals(self->table, other->table, options)); out: return ret; } @@ -2615,7 +2647,7 @@ static PyMethodDef MigrationTable_methods[] = { .ml_doc = "Adds a new row to this table." }, { .ml_name = "equals", .ml_meth = (PyCFunction) MigrationTable_equals, - .ml_flags = METH_VARARGS, + .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = "Returns True if the specified MigrationTable is equal to this one." }, { .ml_name = "get_row", .ml_meth = (PyCFunction) MigrationTable_get_row, @@ -2762,18 +2794,25 @@ SiteTable_add_row(SiteTable *self, PyObject *args, PyObject *kwds) static PyTypeObject SiteTableType; static PyObject * -SiteTable_equals(SiteTable *self, PyObject *args) +SiteTable_equals(SiteTable *self, PyObject *args, PyObject *kwds) { PyObject *ret = NULL; SiteTable *other = NULL; + tsk_flags_t options = 0; + int ignore_metadata = false; + static char *kwlist[] = { "other", "ignore_metadata", NULL }; - if (SiteTable_check_state(self) != 0) { + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O!|i", kwlist, &SiteTableType, &other, &ignore_metadata)) { goto out; } - if (!PyArg_ParseTuple(args, "O!", &SiteTableType, &other)) { + if (SiteTable_check_state(self) != 0 || SiteTable_check_state(other) != 0) { goto out; } - ret = Py_BuildValue("i", tsk_site_table_equals(self->table, other->table)); + if (ignore_metadata) { + options |= TSK_CMP_IGNORE_METADATA; + } + ret = Py_BuildValue("i", tsk_site_table_equals(self->table, other->table, options)); out: return ret; } @@ -3067,7 +3106,7 @@ static PyMethodDef SiteTable_methods[] = { .ml_doc = "Adds a new row to this table." }, { .ml_name = "equals", .ml_meth = (PyCFunction) SiteTable_equals, - .ml_flags = METH_VARARGS, + .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = "Returns True if the specified SiteTable is equal to this one." }, { .ml_name = "get_row", .ml_meth = (PyCFunction) SiteTable_get_row, @@ -3219,18 +3258,26 @@ MutationTable_add_row(MutationTable *self, PyObject *args, PyObject *kwds) static PyTypeObject MutationTableType; static PyObject * -MutationTable_equals(MutationTable *self, PyObject *args) +MutationTable_equals(MutationTable *self, PyObject *args, PyObject *kwds) { PyObject *ret = NULL; MutationTable *other = NULL; + tsk_flags_t options = 0; + int ignore_metadata = false; + static char *kwlist[] = { "other", "ignore_metadata", NULL }; - if (MutationTable_check_state(self) != 0) { + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O!|i", kwlist, &MutationTableType, &other, &ignore_metadata)) { goto out; } - if (!PyArg_ParseTuple(args, "O!", &MutationTableType, &other)) { + if (MutationTable_check_state(self) != 0 || MutationTable_check_state(other) != 0) { goto out; } - ret = Py_BuildValue("i", tsk_mutation_table_equals(self->table, other->table)); + if (ignore_metadata) { + options |= TSK_CMP_IGNORE_METADATA; + } + ret = Py_BuildValue( + "i", tsk_mutation_table_equals(self->table, other->table, options)); out: return ret; } @@ -3569,7 +3616,7 @@ static PyMethodDef MutationTable_methods[] = { .ml_doc = "Adds a new row to this table." }, { .ml_name = "equals", .ml_meth = (PyCFunction) MutationTable_equals, - .ml_flags = METH_VARARGS, + .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = "Returns True if the specified MutationTable is equal to this one." }, { .ml_name = "get_row", .ml_meth = (PyCFunction) MutationTable_get_row, @@ -3713,18 +3760,27 @@ PopulationTable_add_row(PopulationTable *self, PyObject *args, PyObject *kwds) static PyTypeObject PopulationTableType; static PyObject * -PopulationTable_equals(PopulationTable *self, PyObject *args) +PopulationTable_equals(PopulationTable *self, PyObject *args, PyObject *kwds) { PyObject *ret = NULL; PopulationTable *other = NULL; + tsk_flags_t options = 0; + int ignore_metadata = false; + static char *kwlist[] = { "other", "ignore_metadata", NULL }; - if (PopulationTable_check_state(self) != 0) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|i", kwlist, &PopulationTableType, + &other, &ignore_metadata)) { goto out; } - if (!PyArg_ParseTuple(args, "O!", &PopulationTableType, &other)) { + if (PopulationTable_check_state(self) != 0 + || PopulationTable_check_state(other) != 0) { goto out; } - ret = Py_BuildValue("i", tsk_population_table_equals(self->table, other->table)); + if (ignore_metadata) { + options |= TSK_CMP_IGNORE_METADATA; + } + ret = Py_BuildValue( + "i", tsk_population_table_equals(self->table, other->table, options)); out: return ret; } @@ -3967,7 +4023,7 @@ static PyMethodDef PopulationTable_methods[] = { .ml_doc = "Adds a new row to this table." }, { .ml_name = "equals", .ml_meth = (PyCFunction) PopulationTable_equals, - .ml_flags = METH_VARARGS, + .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = "Returns True if the specified PopulationTable is equal to this one." }, { .ml_name = "get_row", @@ -4109,18 +4165,27 @@ ProvenanceTable_add_row(ProvenanceTable *self, PyObject *args, PyObject *kwds) static PyTypeObject ProvenanceTableType; static PyObject * -ProvenanceTable_equals(ProvenanceTable *self, PyObject *args) +ProvenanceTable_equals(ProvenanceTable *self, PyObject *args, PyObject *kwds) { PyObject *ret = NULL; ProvenanceTable *other = NULL; + tsk_flags_t options = 0; + int ignore_timestamps = false; + static char *kwlist[] = { "other", "ignore_timestamps", NULL }; - if (ProvenanceTable_check_state(self) != 0) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|i", kwlist, &ProvenanceTableType, + &other, &ignore_timestamps)) { goto out; } - if (!PyArg_ParseTuple(args, "O!", &ProvenanceTableType, &other)) { + if (ProvenanceTable_check_state(self) != 0 + || ProvenanceTable_check_state(other) != 0) { goto out; } - ret = Py_BuildValue("i", tsk_provenance_table_equals(self->table, other->table)); + if (ignore_timestamps) { + options |= TSK_CMP_IGNORE_TIMESTAMPS; + } + ret = Py_BuildValue( + "i", tsk_provenance_table_equals(self->table, other->table, options)); out: return ret; } @@ -4353,7 +4418,7 @@ static PyMethodDef ProvenanceTable_methods[] = { .ml_doc = "Adds a new row to this table." }, { .ml_name = "equals", .ml_meth = (PyCFunction) ProvenanceTable_equals, - .ml_flags = METH_VARARGS, + .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = "Returns True if the specified ProvenanceTable is equal to this one." }, { .ml_name = "get_row", @@ -4399,6 +4464,17 @@ static PyTypeObject ProvenanceTableType = { *=================================================================== */ +static int +TableCollection_check_state(TableCollection *self) +{ + int ret = 0; + if (self->tables == NULL) { + PyErr_SetString(PyExc_SystemError, "TableCollection not initialised"); + ret = -1; + } + return ret; +} + static void TableCollection_dealloc(TableCollection *self) { @@ -5152,22 +5228,35 @@ TableCollection_equals(TableCollection *self, PyObject *args, PyObject *kwds) TableCollection *other = NULL; tsk_flags_t options = 0; int ignore_metadata = false; + int ignore_ts_metadata = false; int ignore_provenance = false; - static char *kwlist[] - = { "other", "ignore_top_level_metadata", "ignore_provenance", NULL }; + int ignore_timestamps = true; + static char *kwlist[] = { "other", "ignore_metadata", "ignore_ts_metadata", + "ignore_provenance", "ignore_timestamps", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|ii", kwlist, &TableCollectionType, - &other, &ignore_metadata, &ignore_provenance)) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|iiii", kwlist, &TableCollectionType, + &other, &ignore_metadata, &ignore_ts_metadata, &ignore_provenance, + &ignore_timestamps)) { goto out; } if (ignore_metadata) { - options |= TSK_IGNORE_TOP_LEVEL_METADATA; + options |= TSK_CMP_IGNORE_METADATA; + } + if (ignore_ts_metadata) { + options |= TSK_CMP_IGNORE_TS_METADATA; } if (ignore_provenance) { - options |= TSK_IGNORE_PROVENANCE; + options |= TSK_CMP_IGNORE_PROVENANCE; } - ret = Py_BuildValue("i", - tsk_table_collection_equals_with_options(self->tables, other->tables, options)); + if (ignore_timestamps) { + options |= TSK_CMP_IGNORE_TIMESTAMPS; + } + if (TableCollection_check_state(self) != 0 + || TableCollection_check_state(other) != 0) { + goto out; + } + ret = Py_BuildValue( + "i", tsk_table_collection_equals(self->tables, other->tables, options)); out: return ret; } diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 60019faa47..f9e435ba74 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -97,25 +97,23 @@ def ts_fixture(): record_migrations=True, ) tables = ts.dump_tables() - for table in [ - "edges", - "individuals", - "migrations", - "mutations", - "nodes", - "populations", - "sites", - ]: - getattr(tables, table).metadata_schema = tskit.MetadataSchema({"codec": "json"}) - metadatas = [f"n_{table}_{u}" for u in range(getattr(ts, f"num_{table}"))] - metadata, metadata_offset = tskit.pack_strings(metadatas) - getattr(tables, table).set_columns( - **{ - **getattr(tables, table).asdict(), - "metadata": metadata, - "metadata_offset": metadata_offset, - } - ) + # TODO replace this with properly linked up individuals using sim_ancestry + # once 1.0 is released. + for j in range(n): + tables.individuals.add_row(flags=j, location=(j, j)) + + for name, table in tables.name_map.items(): + if name != "provenances": + table.metadata_schema = tskit.MetadataSchema({"codec": "json"}) + metadatas = [f"n_{name}_{u}" for u in range(len(table))] + metadata, metadata_offset = tskit.pack_strings(metadatas) + table.set_columns( + **{ + **table.asdict(), + "metadata": metadata, + "metadata_offset": metadata_offset, + } + ) tables.metadata_schema = tskit.MetadataSchema({"codec": "json"}) tables.metadata = "Test metadata" return tables.tree_sequence() diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index 52c3c63a50..a918ef160b 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -361,13 +361,48 @@ def test_ibd_output_recomb(self): assert len(value) == 3 def test_equals_bad_args(self): - # Tests the low-level equals interface to ensure we're getting coverage. ts = msprime.simulate(10, random_seed=1242) tc = ts.tables._ll_tables with pytest.raises(TypeError): tc.equals() with pytest.raises(TypeError): tc.equals(None) + assert tc.equals(tc) + with pytest.raises(TypeError): + tc.equals(tc, no_such_arg=1) + bad_bool = "x" + with pytest.raises(TypeError): + tc.equals(tc, ignore_metadata=bad_bool) + with pytest.raises(TypeError): + tc.equals(tc, ignore_ts_metadata=bad_bool) + with pytest.raises(TypeError): + tc.equals(tc, ignore_provenance=bad_bool) + with pytest.raises(TypeError): + tc.equals(tc, ignore_timestamps=bad_bool) + + +class TestTableMethodsErrors: + """ + Tests for the error handling of errors in the low-level tables. + """ + + def yield_tables(self, ts): + for table in ts.tables.name_map.values(): + yield table.ll_table + + def test_equals_bad_args(self, ts_fixture): + for ll_table in self.yield_tables(ts_fixture): + assert ll_table.equals(ll_table) + with pytest.raises(TypeError): + ll_table.equals(None) + with pytest.raises(TypeError): + ll_table.equals(ll_table, no_such_arg="") + + def test_get_row_bad_args(self, ts_fixture): + for ll_table in self.yield_tables(ts_fixture): + assert ll_table.get_row(0) is not None + with pytest.raises(TypeError): + ll_table.get_row(no_such_arg="") class TestTreeSequence(LowLevelTestCase, MetadataTestMixin): diff --git a/python/tests/test_tables.py b/python/tests/test_tables.py index 11b8ecd22b..61c6fb2131 100644 --- a/python/tests/test_tables.py +++ b/python/tests/test_tables.py @@ -29,6 +29,7 @@ import math import pickle import random +import time import unittest import warnings @@ -2395,7 +2396,7 @@ def test_equals(self): t2.populations.clear() assert t1 == t2 - def test_equals_with_options(self): + def test_equals_options(self): pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)] migration_matrix = [[0, 1], [1, 0]] t1 = msprime.simulate( @@ -2421,21 +2422,20 @@ def test_equals_with_options(self): t1.metadata_schema = tskit.MetadataSchema({"codec": "json", "type": "object"}) t1.metadata = {"hello": "world"} assert not t1.equals(t2) - assert t1.equals(t2, ignore_top_level_metadata=True) + assert t1.equals(t2, ignore_ts_metadata=True) assert not t2.equals(t1) - assert t2.equals(t1, ignore_top_level_metadata=True) + assert t2.equals(t1, ignore_ts_metadata=True) t2.metadata_schema = t1.metadata_schema assert not t1.equals(t2) - assert t1.equals(t2, ignore_top_level_metadata=True) + assert t1.equals(t2, ignore_ts_metadata=True) assert not t2.equals(t1) - assert t2.equals(t1, ignore_top_level_metadata=True) + assert t2.equals(t1, ignore_ts_metadata=True) - # testing both t1.provenances.add_row("random stuff") assert not t1.equals(t2) - assert not t1.equals(t2, ignore_top_level_metadata=True) + assert not t1.equals(t2, ignore_ts_metadata=True) assert not t1.equals(t2, ignore_provenance=True) - assert t1.equals(t2, ignore_top_level_metadata=True, ignore_provenance=True) + assert t1.equals(t2, ignore_ts_metadata=True, ignore_provenance=True) t1.provenances.clear() t2.metadata = t1.metadata @@ -2540,6 +2540,89 @@ def test_sequence_length_longer_than_edges(self): assert len(tree.parent_dict) == 0 +class TestEqualityOptions: + def test_equals_provenance(self): + t1 = msprime.simulate(10, random_seed=42).tables + time.sleep(0.1) + t2 = msprime.simulate(10, random_seed=42).tables + # Timestamps should differ + assert t1.provenances[-1].timestamp != t2.provenances[-1].timestamp + assert not t1.equals(t2) + assert t1.equals(t2, ignore_timestamps=True) + assert t1.equals(t2, ignore_provenance=True) + assert t1.equals(t2, ignore_provenance=True, ignore_timestamps=True) + + def test_equals_node_metadata(self, ts_fixture): + t1 = ts_fixture.dump_tables() + t2 = t1.copy() + assert t1.equals(t2) + t1.nodes.add_row(time=0, metadata={"a": "a"}) + t2.nodes.add_row(time=0, metadata={"a": "b"}) + assert not t1.nodes.equals(t2.nodes) + assert not t1.equals(t2) + assert t1.nodes.equals(t2.nodes, ignore_metadata=True) + + def test_equals_edge_metadata(self, ts_fixture): + t1 = ts_fixture.dump_tables() + child = t1.nodes.add_row(time=0) + parent = t1.nodes.add_row(time=1) + t2 = t1.copy() + assert t1.equals(t2) + t1.edges.add_row(0, 1, parent, child, metadata={"a": "a"}) + t2.edges.add_row(0, 1, parent, child, metadata={"a": "b"}) + assert not t1.edges.equals(t2.edges) + assert not t1.equals(t2) + assert t1.edges.equals(t2.edges, ignore_metadata=True) + assert t1.equals(t2, ignore_metadata=True) + + def test_equals_migration_metadata(self, ts_fixture): + t1 = ts_fixture.dump_tables() + t2 = t1.copy() + assert t1.equals(t2) + t1.migrations.add_row( + 0, 1, source=0, dest=1, node=0, time=0, metadata={"a": "a"} + ) + t2.migrations.add_row( + 0, 1, source=0, dest=1, node=0, time=0, metadata={"a": "b"} + ) + assert not t1.migrations.equals(t2.migrations) + assert not t1.equals(t2) + assert t1.migrations.equals(t2.migrations, ignore_metadata=True) + assert t1.equals(t2, ignore_metadata=True) + + def test_equals_site_metadata(self, ts_fixture): + t1 = ts_fixture.dump_tables() + t2 = t1.copy() + assert t1.equals(t2) + t1.sites.add_row(0, "A", metadata={"a": "a"}) + t2.sites.add_row(0, "A", metadata={"a": "b"}) + assert not t1.sites.equals(t2.sites) + assert not t1.equals(t2) + assert t1.sites.equals(t2.sites, ignore_metadata=True) + assert t1.equals(t2, ignore_metadata=True) + + def test_equals_mutation_metadata(self, ts_fixture): + t1 = ts_fixture.dump_tables() + t2 = t1.copy() + assert t1.equals(t2) + t1.mutations.add_row(0, 0, "A", metadata={"a": "a"}) + t2.mutations.add_row(0, 0, "A", metadata={"a": "b"}) + assert not t1.mutations.equals(t2.mutations) + assert not t1.equals(t2) + assert t1.mutations.equals(t2.mutations, ignore_metadata=True) + assert t1.equals(t2, ignore_metadata=True) + + def test_equals_population_metadata(self, ts_fixture): + t1 = ts_fixture.dump_tables() + t2 = t1.copy() + assert t1.equals(t2) + t1.populations.add_row({"a": "a"}) + t2.populations.add_row({"a": "b"}) + assert not t1.populations.equals(t2.populations) + assert not t1.equals(t2) + assert t1.equals(t2, ignore_metadata=True) + + class TestTableCollectionMethodSignatures: tc = msprime.simulate(10, random_seed=1234).dump_tables() diff --git a/python/tskit/tables.py b/python/tskit/tables.py index f0222fa88b..8205a952c7 100644 --- a/python/tskit/tables.py +++ b/python/tskit/tables.py @@ -187,14 +187,28 @@ def max_rows(self): def max_rows_increment(self): return self.ll_table.max_rows_increment - def __eq__(self, other): + def equals(self, other, ignore_metadata=False): + """ + Returns True if `self` and `other` are equal. By default, two tables + are considered equal if their columns and metadata schemas are + byte-for-byte identical. + + :param other: Another table instance + :param bool ignore_metadata: If True exclude metadata and metadata schemas + from the comparison. + :return: True if other is equal to this table; False otherwise. + :rtype: bool + """ + # Note: most tables support ignore_metadata, we can override for those that don't ret = False if type(other) is type(self): - ret = bool(self.ll_table.equals(other.ll_table)) + ret = bool( + self.ll_table.equals(other.ll_table, ignore_metadata=ignore_metadata) + ) return ret - def __ne__(self, other): - return not self.__eq__(other) + def __eq__(self, other): + return self.equals(other) def __len__(self): return self.num_rows @@ -1822,6 +1836,26 @@ def __init__(self, max_rows_increment=0, ll_table=None): ll_table = _tskit.ProvenanceTable(max_rows_increment=max_rows_increment) super().__init__(ll_table, ProvenanceTableRow) + def equals(self, other, ignore_timestamps=False): + """ + Returns True if `self` and `other` are equal. By default, two provenance + tables are considered equal if their columns are byte-for-byte identical. + + :param other: Another provenance table instance + :param bool ignore_timestamps: If True exclude the timestamp column + from the comparison. + :return: True if other is equal to this provenance table; False otherwise. + :rtype: bool + """ + ret = False + if type(other) is type(self): + ret = bool( + self.ll_table.equals( + other.ll_table, ignore_timestamps=ignore_timestamps + ) + ) + return ret + def add_row(self, record, timestamp=None): """ Adds a new row to this ProvenanceTable consisting of the specified record and @@ -2146,14 +2180,59 @@ def __str__(self): s += str(self.provenances) return s - def __eq__(self, other): + def equals( + self, + other, + ignore_metadata=False, + ignore_ts_metadata=False, + ignore_provenance=False, + ignore_timestamps=False, + ): + """ + Returns True if `self` and `other` are equal. By default, two table + collections are considered equal if their + + - ``sequence_length`` properties are identical; + - top-level tree sequence metadata and metadata schemas are + byte-wise identical; + - constituent tables are byte-wise identical. + + Some of the requirements in this definition can be relaxed using the + parameters, which can be used to remove certain parts of the data model + from the comparison. + + Table indexes are not considered in the equality comparison. + + :param TableCollection other: Another table collection. + :param bool ignore_metadata: If True *all* metadata and metadata schemas + will be excluded from the comparison. This includes the top-level + tree sequence and constituent table metadata (default=False). + :param bool ignore_ts_metadata: If True the top-level tree sequence + metadata and metadata schemas will be excluded from the comparison. + If ``ignore_metadata`` is True, this parameter has no effect. + :param bool ignore_provenance: If True the provenance tables are + not included in the comparison. + :param bool ignore_timestamps: If True the provenance timestamp column + is ignored in the comparision. If ``ignore_provenance`` is True, this + parameter has no effect. + :return: True if other is equal to this table collection; False otherwise. + :rtype: bool + """ ret = False if type(other) is type(self): - ret = bool(self._ll_tables.equals(other._ll_tables)) + ret = bool( + self._ll_tables.equals( + other._ll_tables, + ignore_metadata=bool(ignore_metadata), + ignore_ts_metadata=bool(ignore_ts_metadata), + ignore_provenance=bool(ignore_provenance), + ignore_timestamps=bool(ignore_timestamps), + ) + ) return ret - def __ne__(self, other): - return not self.__eq__(other) + def __eq__(self, other): + return self.equals(other) def __getstate__(self): return self.asdict() @@ -2794,31 +2873,6 @@ def union( record=json.dumps(provenance.get_provenance_dict(parameters)) ) - def equals(self, other, ignore_top_level_metadata=False, ignore_provenance=False): - """ - Returns True if `self` and `other` are equal. The comparison of - top-level metadata/metadata schema and provenance tables may be - disabled with the flags `ignore_top_level_metadata` and `ignore_provenance`, - which are false by default. Note that table row-level metadata and table - schemas are always checked. - - :param TableCollection other: Another table collection. - :param bool ignore_top_level_metadata: If True, the top-level metadata and - metadata schema are ignored. - :param bool ignore_provenance: If True, the provenance tables are - ignored. - """ - ret = False - if type(other) is type(self): - ret = bool( - self._ll_tables.equals( - other._ll_tables, - ignore_top_level_metadata=ignore_top_level_metadata, - ignore_provenance=ignore_provenance, - ) - ) - return ret - def find_ibd(self, samples, max_time=None, min_length=None): max_time = sys.float_info.max if max_time is None else max_time min_length = 0 if min_length is None else min_length