From 626512f04be692722130b6a27bff5ce62301d961 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Mon, 8 Feb 2021 15:51:30 +0000 Subject: [PATCH 1/2] Add individual parent checks --- c/tests/test_tables.c | 35 ++++++++++++++- c/tests/test_trees.c | 35 ++++++++++++--- c/tskit/core.c | 6 +++ c/tskit/core.h | 3 ++ c/tskit/tables.c | 43 ++++++++++++++++++- c/tskit/tables.h | 3 ++ docs/data-model.rst | 6 +-- python/CHANGELOG.rst | 2 +- python/lwt_interface/dict_encoding_testlib.py | 2 +- python/tests/conftest.py | 2 +- python/tests/test_tables.py | 7 +++ 11 files changed, 129 insertions(+), 15 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index c2d310d2b5..3f309719db 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -275,7 +275,7 @@ test_table_collection_simplify_errors(void) int ret; tsk_table_collection_t tables; tsk_id_t samples[] = { 0, 1 }; - + const char *individuals = "1 0.25 -2\n"; ret = tsk_table_collection_init(&tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); tables.sequence_length = 1; @@ -296,6 +296,14 @@ test_table_collection_simplify_errors(void) tables.sites.position[0] = 1.5; ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SITE_POSITION); + tsk_site_table_truncate(&tables.sites, 0); + tables.sites.position[0] = 0; + + /* Individual out of bounds */ + parse_individuals(individuals, &tables.individuals); + CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 1); + ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS); /* TODO More tests for this: see * https://github.com/tskit-dev/msprime/issues/517 */ @@ -4755,6 +4763,9 @@ test_table_collection_check_integrity_with_options(tsk_flags_t tc_options) { int ret; tsk_table_collection_t tables; + const char *individuals = "1 0.25 -1\n" + "2 0.5,0.25 2\n" + "3 0.5,0.25 0\n"; ret = tsk_table_collection_init(&tables, tc_options); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -5177,6 +5188,28 @@ test_table_collection_check_integrity_with_options(tsk_flags_t tc_options) CU_ASSERT_FATAL(ret >= 0); ret = tsk_table_collection_check_integrity(&tables, 0); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_EDGE_INTERVAL); + ret = tsk_migration_table_clear(&tables.migrations); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + parse_individuals(individuals, &tables.individuals); + CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 3); + ret = tsk_table_collection_check_integrity(&tables, TSK_CHECK_INDIVIDUAL_ORDERING); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS); + ret = tsk_table_collection_check_integrity(&tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + /* Check that an individual can't be its own parent */ + tables.individuals.parents[0] = 0; + tables.individuals.parents[1] = 1; + tables.individuals.parents[2] = 2; + ret = tsk_table_collection_check_integrity(&tables, TSK_CHECK_INDIVIDUAL_ORDERING); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS); + ret = tsk_table_collection_check_integrity(&tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + tables.individuals.parents[0] = -2; + ret = tsk_table_collection_check_integrity(&tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS); tsk_table_collection_free(&tables); } diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c index 59a7ca505a..282dabc1f7 100644 --- a/c/tests/test_trees.c +++ b/c/tests/test_trees.c @@ -1993,6 +1993,8 @@ test_simplest_bad_individuals(void) const char *edges = "0 1 2 0\n" "0 1 2 1\n" "0 1 4 3\n"; + const char *individuals = "1 0.25 -1\n" + "2 0.5,0.25 0\n"; tsk_treeseq_t ts; tsk_table_collection_t tables; tsk_flags_t load_flags = TSK_BUILD_INDEXES; @@ -2028,13 +2030,14 @@ test_simplest_bad_individuals(void) tsk_treeseq_free(&ts); tables.nodes.individual[0] = TSK_NULL; - /* add two individuals */ - ret = tsk_individual_table_add_row( - &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); - CU_ASSERT_EQUAL(ret, 0); - ret = tsk_individual_table_add_row( - &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); - CU_ASSERT_EQUAL(ret, 1); + /* Add two individuals */ + parse_individuals(individuals, &tables.individuals); + CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 2); + + /* Make sure we have a good set of records */ + ret = tsk_treeseq_init(&ts, &tables, load_flags); + CU_ASSERT_EQUAL_FATAL(ret, 0); + tsk_treeseq_free(&ts); /* Bad individual ID */ tables.nodes.individual[0] = 2; @@ -2043,6 +2046,24 @@ test_simplest_bad_individuals(void) tsk_treeseq_free(&ts); tables.nodes.individual[0] = TSK_NULL; + /* Bad parent ID */ + tables.individuals.parents[0] = -2; + ret = tsk_treeseq_init(&ts, &tables, load_flags); + CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS); + tsk_treeseq_free(&ts); + tables.individuals.parents[0] = 42; + ret = tsk_treeseq_init(&ts, &tables, load_flags); + CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS); + tsk_treeseq_free(&ts); + tables.individuals.parents[0] = TSK_NULL; + + /* Unsorted individuals */ + tables.individuals.parents[0] = 1; + ret = tsk_treeseq_init(&ts, &tables, load_flags); + CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS); + tsk_treeseq_free(&ts); + tables.individuals.parents[0] = TSK_NULL; + tsk_treeseq_free(&ts); tsk_table_collection_free(&tables); } diff --git a/c/tskit/core.c b/c/tskit/core.c index 7c5dc57da8..50b8b33d9a 100644 --- a/c/tskit/core.c +++ b/c/tskit/core.c @@ -474,6 +474,12 @@ tsk_strerror_internal(int err) case TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE: ret = "You cannot specify both KEEP_UNARY and KEEP_UNARY_IN_INDIVDUALS."; break; + + /* Individual errors */ + case TSK_ERR_UNSORTED_INDIVIDUALS: + ret = "Individuals must be provided in an order where children are after " + "their parent individuals"; + break; } return ret; } diff --git a/c/tskit/core.h b/c/tskit/core.h index eba37ddc24..7a2c63fdb6 100644 --- a/c/tskit/core.h +++ b/c/tskit/core.h @@ -323,6 +323,9 @@ not found in the file. /* Simplify errors */ #define TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE -1600 +/* Individual errors */ +#define TSK_ERR_UNSORTED_INDIVIDUALS -1700 + // clang-format on /* This bit is 0 for any errors originating from kastore */ diff --git a/c/tskit/tables.c b/c/tskit/tables.c index b9558e21bb..ac7c4cb395 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -7943,6 +7943,42 @@ tsk_table_collection_check_migration_integrity( return ret; } +static int TSK_WARN_UNUSED +tsk_table_collection_check_individual_integrity( + const tsk_table_collection_t *self, tsk_flags_t options) +{ + int ret = 0; + tsk_size_t j, k; + const tsk_individual_table_t individuals = self->individuals; + const tsk_id_t num_individuals = (tsk_id_t) individuals.num_rows; + const bool check_individual_ordering = options & TSK_CHECK_INDIVIDUAL_ORDERING; + + /* Check parent references are valid */ + for (j = 0; j < individuals.parents_length; j++) { + if (individuals.parents[j] != TSK_NULL + && (individuals.parents[j] < 0 + || individuals.parents[j] >= num_individuals)) { + ret = TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS; + goto out; + } + } + + /* Check parents are ordered */ + if (check_individual_ordering) { + for (j = 0; j < (tsk_size_t) num_individuals; j++) { + for (k = individuals.parents_offset[j]; + k < individuals.parents_offset[j + 1]; k++) { + if (individuals.parents[k] != TSK_NULL + && individuals.parents[k] >= (tsk_id_t) j) { + ret = TSK_ERR_UNSORTED_INDIVIDUALS; + } + } + } + } +out: + return ret; +} + static tsk_id_t TSK_WARN_UNUSED tsk_table_collection_check_tree_integrity(const tsk_table_collection_t *self) { @@ -8074,7 +8110,7 @@ tsk_table_collection_check_integrity( /* Checking the trees implies all the other checks */ options |= TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING | TSK_CHECK_SITE_DUPLICATES | TSK_CHECK_MUTATION_ORDERING - | TSK_CHECK_INDEXES; + | TSK_CHECK_INDEXES | TSK_CHECK_INDIVIDUAL_ORDERING; } if (self->sequence_length <= 0) { @@ -8105,6 +8141,11 @@ tsk_table_collection_check_integrity( if (ret != 0) { goto out; } + ret = tsk_table_collection_check_individual_integrity(self, options); + if (ret != 0) { + goto out; + } + if (options & TSK_CHECK_INDEXES) { ret = tsk_table_collection_check_index_integrity(self); if (ret != 0) { diff --git a/c/tskit/tables.h b/c/tskit/tables.h index 26580863b3..89e4831dd2 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -720,6 +720,7 @@ typedef struct { #define TSK_CHECK_MUTATION_ORDERING (1 << 3) #define TSK_CHECK_INDEXES (1 << 4) #define TSK_CHECK_TREES (1 << 5) +#define TSK_CHECK_INDIVIDUAL_ORDERING (1 << 6) /* Leave room for more positive check flags */ #define TSK_NO_CHECK_POPULATION_REFS (1 << 10) @@ -2999,6 +3000,8 @@ TSK_CHECK_MUTATION_ORDERING Check contraints on the ordering of mutations. Any non-null mutation parents and known times are checked for ordering constraints. +TSK_CHECK_INDIVIDUAL_ORDERING + Check individual parents are before children, where specified. TSK_CHECK_INDEXES Check that the table indexes exist, and contain valid edge references. diff --git a/docs/data-model.rst b/docs/data-model.rst index 5a18bf58b1..18aa6e35d8 100644 --- a/docs/data-model.rst +++ b/docs/data-model.rst @@ -540,10 +540,10 @@ Individual requirements ----------------------- Individuals are a basic type in a tree sequence and are not defined with -respect to any other tables. Therefore, there are no requirements on -individuals. +respect to any other tables. Individuals can have a reference to their parent +individuals, if present these references must be valid or null (-1). -There are no requirements regarding the ordering of individuals. +Individuals must be sorted such that parents are before children. Sorting a set of tables using :meth:`TableCollection.sort` has no effect on the individuals. diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index fdb64907fb..1e16134048 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -8,7 +8,7 @@ (:user:`hyanwong`,:issue:`1155`, :pr:`1182`). - Add ``parents`` column to the individual table to allow recording of pedigrees - (:user:`ivan-krukov`, :user:`benjeffery`, :issue:`852`, :pr:`1125`, :pr:`866`, :pr:`1153`, :pr:`1177`). + (:user:`ivan-krukov`, :user:`benjeffery`, :issue:`852`, :pr:`1125`, :pr:`866`, :pr:`1153`, :pr:`1177` :pr:`1192`). - Added ``Tree.generate_random_binary`` static method to create random binary trees (:user:`hyanwong`, :user:`jeromekelleher`, :pr:`1037`). diff --git a/python/lwt_interface/dict_encoding_testlib.py b/python/lwt_interface/dict_encoding_testlib.py index eac873a79c..ae8f0c742a 100644 --- a/python/lwt_interface/dict_encoding_testlib.py +++ b/python/lwt_interface/dict_encoding_testlib.py @@ -70,7 +70,7 @@ def full_ts(): # TODO replace this with properly linked up individuals using sim_ancestry # once 1.0 is released. for j in range(n): - tables.individuals.add_row(flags=j, location=(j, j), parents=(j, j)) + tables.individuals.add_row(flags=j, location=(j, j), parents=(j - 1, j - 1)) for name, table in tables.name_map.items(): if name != "provenances": diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 1c04d75c38..c591366cbd 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -101,7 +101,7 @@ def ts_fixture(): # TODO replace this with properly linked up individuals using sim_ancestry # once 1.0 is released. for j in range(n): - tables.individuals.add_row(flags=j, location=(j, j), parents=(j, j)) + tables.individuals.add_row(flags=j, location=(j, j), parents=(j - 1, j - 1)) for name, table in tables.name_map.items(): if name != "provenances": diff --git a/python/tests/test_tables.py b/python/tests/test_tables.py index 368fa00486..db4396365a 100644 --- a/python/tests/test_tables.py +++ b/python/tests/test_tables.py @@ -2361,6 +2361,13 @@ def test_samples_interface(self): with pytest.raises(OverflowError): tables.simplify(samples=np.array([0, bad_node])) + def test_bad_individuals(self, simple_ts_fixture): + tables = simple_ts_fixture.dump_tables() + tables.individuals.clear() + tables.individuals.add_row(parents=[-2]) + with pytest.raises(tskit.LibraryError, match="Individual out of bounds"): + tables.simplify() + class TestTableCollection: """ From 774f74a738f0aaebbc476ed2d29bafb2445621d3 Mon Sep 17 00:00:00 2001 From: peter Date: Fri, 12 Feb 2021 06:42:24 -0800 Subject: [PATCH 2/2] self parentage check --- c/tests/test_tables.c | 4 +--- c/tests/test_trees.c | 7 +++++++ c/tskit/core.c | 4 ++++ c/tskit/core.h | 1 + c/tskit/tables.c | 39 +++++++++++++++++++------------------ python/tests/test_tables.py | 16 +++++++++++++++ 6 files changed, 49 insertions(+), 22 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 3f309719db..57ade414a3 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -5202,10 +5202,8 @@ test_table_collection_check_integrity_with_options(tsk_flags_t tc_options) tables.individuals.parents[0] = 0; tables.individuals.parents[1] = 1; tables.individuals.parents[2] = 2; - ret = tsk_table_collection_check_integrity(&tables, TSK_CHECK_INDIVIDUAL_ORDERING); - CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS); ret = tsk_table_collection_check_integrity(&tables, 0); - CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_SELF_PARENT); tables.individuals.parents[0] = -2; ret = tsk_table_collection_check_integrity(&tables, 0); diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c index 282dabc1f7..a6af0bb4d1 100644 --- a/c/tests/test_trees.c +++ b/c/tests/test_trees.c @@ -2057,6 +2057,13 @@ test_simplest_bad_individuals(void) tsk_treeseq_free(&ts); tables.individuals.parents[0] = TSK_NULL; + /* Parent is self */ + tables.individuals.parents[0] = 0; + ret = tsk_treeseq_init(&ts, &tables, load_flags); + CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_SELF_PARENT); + tsk_treeseq_free(&ts); + tables.individuals.parents[0] = TSK_NULL; + /* Unsorted individuals */ tables.individuals.parents[0] = 1; ret = tsk_treeseq_init(&ts, &tables, load_flags); diff --git a/c/tskit/core.c b/c/tskit/core.c index 50b8b33d9a..199096b12b 100644 --- a/c/tskit/core.c +++ b/c/tskit/core.c @@ -480,6 +480,10 @@ tsk_strerror_internal(int err) ret = "Individuals must be provided in an order where children are after " "their parent individuals"; break; + + case TSK_ERR_INDIVIDUAL_SELF_PARENT: + ret = "Individuals cannot be their own parents."; + break; } return ret; } diff --git a/c/tskit/core.h b/c/tskit/core.h index 7a2c63fdb6..00f1152ddc 100644 --- a/c/tskit/core.h +++ b/c/tskit/core.h @@ -325,6 +325,7 @@ not found in the file. /* Individual errors */ #define TSK_ERR_UNSORTED_INDIVIDUALS -1700 +#define TSK_ERR_INDIVIDUAL_SELF_PARENT -1701 // clang-format on diff --git a/c/tskit/tables.c b/c/tskit/tables.c index ac7c4cb395..4c448fd9d9 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -7953,25 +7953,26 @@ tsk_table_collection_check_individual_integrity( const tsk_id_t num_individuals = (tsk_id_t) individuals.num_rows; const bool check_individual_ordering = options & TSK_CHECK_INDIVIDUAL_ORDERING; - /* Check parent references are valid */ - for (j = 0; j < individuals.parents_length; j++) { - if (individuals.parents[j] != TSK_NULL - && (individuals.parents[j] < 0 - || individuals.parents[j] >= num_individuals)) { - ret = TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS; - goto out; - } - } - - /* Check parents are ordered */ - if (check_individual_ordering) { - for (j = 0; j < (tsk_size_t) num_individuals; j++) { - for (k = individuals.parents_offset[j]; - k < individuals.parents_offset[j + 1]; k++) { - if (individuals.parents[k] != TSK_NULL - && individuals.parents[k] >= (tsk_id_t) j) { - ret = TSK_ERR_UNSORTED_INDIVIDUALS; - } + for (j = 0; j < (tsk_size_t) num_individuals; j++) { + for (k = individuals.parents_offset[j]; k < individuals.parents_offset[j + 1]; + k++) { + /* Check parent references are valid */ + if (individuals.parents[k] != TSK_NULL + && (individuals.parents[k] < 0 + || individuals.parents[k] >= num_individuals)) { + ret = TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS; + goto out; + } + /* Check no-one is their own parent */ + if (individuals.parents[k] == (tsk_id_t) j) { + ret = TSK_ERR_INDIVIDUAL_SELF_PARENT; + goto out; + } + /* Check parents are ordered */ + if (check_individual_ordering && individuals.parents[k] != TSK_NULL + && individuals.parents[k] >= (tsk_id_t) j) { + ret = TSK_ERR_UNSORTED_INDIVIDUALS; + goto out; } } } diff --git a/python/tests/test_tables.py b/python/tests/test_tables.py index db4396365a..dbbc8c1c0a 100644 --- a/python/tests/test_tables.py +++ b/python/tests/test_tables.py @@ -2367,6 +2367,22 @@ def test_bad_individuals(self, simple_ts_fixture): tables.individuals.add_row(parents=[-2]) with pytest.raises(tskit.LibraryError, match="Individual out of bounds"): tables.simplify() + tables.individuals.clear() + tables.individuals.add_row(parents=[0]) + with pytest.raises( + tskit.LibraryError, match="Individuals cannot be their own parents" + ): + tables.simplify() + + def test_unsorted_individuals_ok(self, simple_ts_fixture): + tables = simple_ts_fixture.dump_tables() + tables.individuals.clear() + tables.individuals.clear() + tables.individuals.add_row(parents=[1]) + tables.individuals.add_row(parents=[-1]) + # we really just want to check that no error is thrown here + tables.simplify() + assert tables.individuals.num_rows == 0 class TestTableCollection: