Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -2860,6 +2860,43 @@ test_simplify_metadata(void)
tsk_table_collection_free(&tables);
}

static void
test_edge_update_invalidates_index(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;

tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);

/* Any operations on the edge table should now invalidate the index */
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));
/* Even though the actual indexes still exist */
CU_ASSERT_FALSE(tables.indexes.edge_insertion_order == NULL);
CU_ASSERT_FALSE(tables.indexes.edge_removal_order == NULL);
CU_ASSERT_EQUAL_FATAL(tables.indexes.num_edges, tsk_treeseq_get_num_edges(&ts));

ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))
ret = tsk_edge_table_add_row(&tables.edges, 0, 1, 0, 1, NULL, 0);
CU_ASSERT_TRUE(ret > 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));
/* Even though the actual indexes still exist */
CU_ASSERT_FALSE(tables.indexes.edge_insertion_order == NULL);
CU_ASSERT_FALSE(tables.indexes.edge_removal_order == NULL);
CU_ASSERT_EQUAL_FATAL(tables.indexes.num_edges, tsk_treeseq_get_num_edges(&ts));

tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}

static void
test_copy_table_collection(void)
{
Expand Down Expand Up @@ -4113,6 +4150,7 @@ main(int argc, char **argv)
{ "test_sort_tables_drops_indexes", test_sort_tables_drops_indexes },
{ "test_sort_tables_edge_metadata", test_sort_tables_edge_metadata },
{ "test_sort_tables_no_edge_metadata", test_sort_tables_no_edge_metadata },
{ "test_edge_update_invalidates_index", test_edge_update_invalidates_index },
{ "test_copy_table_collection", test_copy_table_collection },
{ "test_sort_tables_errors", test_sort_tables_errors },
{ "test_sorter_interface", test_sorter_interface },
Expand Down
2 changes: 1 addition & 1 deletion c/tests/testlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ tsk_treeseq_from_text(tsk_treeseq_t *ts, double sequence_length, const char *nod

ret = tsk_treeseq_init(ts, &tables, TSK_BUILD_INDEXES);
/* tsk_treeseq_print_state(ts, stdout); */
// printf("ret = %s\n", tsk_strerror(ret));
/* printf("ret = %s\n", tsk_strerror(ret)); */
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tables);
}
Expand Down
10 changes: 7 additions & 3 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -7128,6 +7128,7 @@ tsk_table_collection_set_index(tsk_table_collection_t *self,
}
memcpy(self->indexes.edge_insertion_order, edge_insertion_order, index_size);
memcpy(self->indexes.edge_removal_order, edge_removal_order, index_size);
self->indexes.num_edges = self->edges.num_rows;
out:
return ret;
}
Expand All @@ -7137,7 +7138,8 @@ tsk_table_collection_has_index(
tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))
{
return self->indexes.edge_insertion_order != NULL
&& self->indexes.edge_removal_order != NULL;
&& self->indexes.edge_removal_order != NULL
&& self->indexes.num_edges == self->edges.num_rows;
}

int
Expand All @@ -7148,6 +7150,7 @@ tsk_table_collection_drop_index(
tsk_safe_free(self->indexes.edge_removal_order);
self->indexes.edge_insertion_order = NULL;
self->indexes.edge_removal_order = NULL;
self->indexes.num_edges = 0;
return 0;
}

Expand Down Expand Up @@ -7206,6 +7209,7 @@ tsk_table_collection_build_index(
for (j = 0; j < self->edges.num_rows; j++) {
self->indexes.edge_removal_order[j] = sort_buff[j].index;
}
self->indexes.num_edges = self->edges.num_rows;
ret = 0;
out:
tsk_safe_free(sort_buff);
Expand Down Expand Up @@ -7410,8 +7414,8 @@ tsk_table_collection_dump_indexes(tsk_table_collection_t *self, kastore_t *store
{
int ret = 0;
write_table_col_t write_cols[] = {
{ "indexes/edge_insertion_order", NULL, self->edges.num_rows, KAS_INT32 },
{ "indexes/edge_removal_order", NULL, self->edges.num_rows, KAS_INT32 },
{ "indexes/edge_insertion_order", NULL, self->indexes.num_edges, KAS_INT32 },
{ "indexes/edge_removal_order", NULL, self->indexes.num_edges, KAS_INT32 },
};

if (tsk_table_collection_has_index(self, 0)) {
Expand Down
22 changes: 19 additions & 3 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ typedef struct {
struct {
tsk_id_t *edge_insertion_order;
tsk_id_t *edge_removal_order;
tsk_size_t num_edges;
} indexes;
} tsk_table_collection_t;

Expand Down Expand Up @@ -2247,7 +2248,7 @@ Writes the data from this table collection to the specified file.
Usually we expect that data written to a file will be in a form that
can be read directly and used to create a tree sequence; that is, we
assume that by default the tables are :ref:`sorted <sec_table_sorting>`
and should be :ref:`indexed <sec_table_indexing>`. Following these
and :ref:`indexed <sec_table_indexes>`. Following these
assumptions, if the tables are not already indexed, we index the tables
before writing to file to save the cost of building these indexes at
load time. This behaviour requires that the tables are sorted.
Expand Down Expand Up @@ -2543,6 +2544,17 @@ int tsk_table_collection_set_metadata_schema(tsk_table_collection_t *self,
/**
@brief Returns true if this table collection is indexed.

@rst
This method returns true if the table collection has an index
for the edge table. It guarantees that the index exists, and that
it is for the same number of edges that are in the edge table. It
does *not* guarantee that the index is valid (i.e., if the rows
in the edge have been permuted in some way since the index was built).

See the :ref:`sec_c_api_table_indexes` section for details on the index
life-cycle.
@endrst

@param self A pointer to a tsk_table_collection_t object.
@param options Bitwise options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
Expand All @@ -2556,6 +2568,8 @@ bool tsk_table_collection_has_index(tsk_table_collection_t *self, tsk_flags_t op
@rst
Unconditionally drop the indexes that may be present for this table collection. It
is not an error to call this method on an unindexed table collection.
See the :ref:`sec_c_api_table_indexes` section for details on the index
life-cycle.
@endrst

@param self A pointer to a tsk_table_collection_t object.
Expand All @@ -2569,8 +2583,10 @@ int tsk_table_collection_drop_index(tsk_table_collection_t *self, tsk_flags_t op
@brief Builds indexes for this table collection.

@rst
Builds the tree traversal :ref:`indexes <sec_table_indexing>` for this table collection.
Any existing index is first dropped using :c:func:`tsk_table_collection_drop_index`.
Builds the tree traversal :ref:`indexes <sec_table_indexes>` for this table
collection. Any existing index is first dropped using
:c:func:`tsk_table_collection_drop_index`. See the
:ref:`sec_c_api_table_indexes` section for details on the index life-cycle.
@endrst

@param self A pointer to a tsk_table_collection_t object.
Expand Down
38 changes: 38 additions & 0 deletions docs/c-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,44 @@ Provenances
:content-only:


.. _sec_c_api_table_indexes:

*************
Table indexes
*************

Along with the tree sequence :ref:`ordering requirements
<sec_valid_tree_sequence_requirements>`, the :ref:`sec_table_indexes`
allow us to take a table collection and efficiently operate
on the trees defined within it. This section defines the rules
for safely operating on table indexes and their life-cycle.

The edge index used for tree generation consists of two arrays,
each holding ``N`` edge IDs (where ``N`` is the size of the edge
table). When the index is computed using
:c:func:`tsk_table_collection_build_index`, we store the current size
of the edge table along with the two arrays of edge IDs. The
function :c:func:`tsk_table_collection_has_index` then returns true
iff (a) both of these arrays are not NULL and (b) the stored
number of edges is the same as the current size of the edge table.

Updating the edge table does not automatically invalidate the indexes.
Thus, if we call :c:func:`tsk_edge_table_clear` on an edge table
which has an index, this index will still exist. However, it will
not be considered a valid index by
:c:func:`tsk_table_collection_has_index` because of the size mismatch.
Similarly for functions that increase the size of the table.
Note that it is possible then to have
:c:func:`tsk_table_collection_has_index` return true, but the index
is not actually valid, if, for example, the user has manipulated the
node and edge tables to describe a different topology, which happens
to have the same number of edges. The behaviour of methods that
use the indexes will be undefined in this case.

Thus, if you are manipulating an existing table collection that may
be indexed, it is always recommended to call
:c:func:`tsk_table_collection_drop_index` first.

.. _sec_c_api_tree_sequences:

**************
Expand Down
8 changes: 5 additions & 3 deletions docs/data-model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -790,16 +790,18 @@ appearance), then this property is preserved, even if the `parent` properties
are not specified.


.. _sec_table_indexing:
.. _sec_table_indexes:

Indexing
--------
Table indexes
-------------

To efficiently iterate over the trees in a tree sequence, ``tskit`` uses
indexes built on the edges. To create a tree sequence from a table collection
the tables must be indexed; the :meth:`TableCollection.build_index` method
can be used to create an index on a table collection if necessary.

.. todo:: Add more details on what the indexes actually are.

Removing duplicate sites
------------------------

Expand Down