Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
current behaviour. (:user:`mufernando`, :user:`jeromekelleher`,
:issue:`896`, :pr:`897`, :issue:`913`, :pr:`917`).

- Changed default behaviour of ``tsk_table_collection_clear`` to not clear
provenances and added ``options`` argument to optionally clear provenances
and schemas.
(:user:`benjeffery`, :issue:`929`, :pr:`1001`)

- Exposed ``tsk_table_collection_set_indexes`` to the API.
(:user:`benjeffery`, :issue:`870`, :pr:`921`)

Expand Down
115 changes: 114 additions & 1 deletion c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -5142,7 +5142,7 @@ test_table_collection_union(void)
&tables_copy, &tables_empty, node_mapping, TSK_UNION_NO_CHECK_SHARED);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));
// self is empty
ret = tsk_table_collection_clear(&tables_copy);
ret = tsk_table_collection_clear(&tables_copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_union(
&tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);
Expand Down Expand Up @@ -5290,6 +5290,118 @@ test_table_collection_union_errors(void)
tsk_table_collection_free(&tables);
}

static void
test_table_collection_clear_with_options(tsk_flags_t options)
{
int ret;
tsk_table_collection_t tables;
bool clear_provenance = !!(options & TSK_CLEAR_PROVENANCE);
bool clear_metadata_schemas = !!(options & TSK_CLEAR_METADATA_SCHEMAS);
bool clear_ts_metadata = !!(options & TSK_CLEAR_TS_METADATA_AND_SCHEMA);
tsk_bookmark_t num_rows;
tsk_bookmark_t expected_rows = { .provenances = clear_provenance ? 0 : 1 };
tsk_size_t expected_len = clear_metadata_schemas ? 0 : 4;
tsk_size_t expected_len_ts = clear_ts_metadata ? 0 : 4;

ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;

ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 1, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_site_table_add_row(&tables.sites, 0.2, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);

ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_individual_table_set_metadata_schema(&tables.individuals, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_set_metadata_schema(&tables.nodes, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_set_metadata_schema(&tables.edges, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_set_metadata_schema(&tables.migrations, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_set_metadata_schema(&tables.sites, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_set_metadata_schema(&tables.mutations, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_set_metadata_schema(&tables.populations, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_table_collection_set_metadata(&tables, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata_schema(&tables, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_provenance_table_add_row(&tables.provenances, "today", 5, "test", 4);
CU_ASSERT_FATAL(ret >= 0);

ret = tsk_table_collection_clear(&tables, options);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_table_collection_record_num_rows(&tables, &num_rows);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(num_rows.individuals, expected_rows.individuals);
CU_ASSERT_EQUAL(num_rows.nodes, expected_rows.nodes);
CU_ASSERT_EQUAL(num_rows.edges, expected_rows.edges);
CU_ASSERT_EQUAL(num_rows.migrations, expected_rows.migrations);
CU_ASSERT_EQUAL(num_rows.sites, expected_rows.sites);
CU_ASSERT_EQUAL(num_rows.mutations, expected_rows.mutations);
CU_ASSERT_EQUAL(num_rows.populations, expected_rows.populations);
CU_ASSERT_EQUAL(num_rows.provenances, expected_rows.provenances);

CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));

CU_ASSERT_EQUAL(tables.individuals.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.nodes.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.edges.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.migrations.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.sites.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.mutations.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.populations.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.metadata_schema_length, expected_len_ts);
CU_ASSERT_EQUAL(tables.metadata_length, expected_len_ts);

tsk_table_collection_free(&tables);
}

static void
test_table_collection_clear(void)
{
test_table_collection_clear_with_options(0);
test_table_collection_clear_with_options(TSK_CLEAR_PROVENANCE);
test_table_collection_clear_with_options(TSK_CLEAR_METADATA_SCHEMAS);
test_table_collection_clear_with_options(TSK_CLEAR_TS_METADATA_AND_SCHEMA);
test_table_collection_clear_with_options(
TSK_CLEAR_PROVENANCE | TSK_CLEAR_METADATA_SCHEMAS);
test_table_collection_clear_with_options(
TSK_CLEAR_PROVENANCE | TSK_CLEAR_TS_METADATA_AND_SCHEMA);
test_table_collection_clear_with_options(
TSK_CLEAR_METADATA_SCHEMAS | TSK_CLEAR_TS_METADATA_AND_SCHEMA);
test_table_collection_clear_with_options(TSK_CLEAR_PROVENANCE
| TSK_CLEAR_METADATA_SCHEMAS
| TSK_CLEAR_TS_METADATA_AND_SCHEMA);
}

int
main(int argc, char **argv)
{
Expand Down Expand Up @@ -5360,6 +5472,7 @@ main(int argc, char **argv)
{ "test_table_collection_subset_errors", test_table_collection_subset_errors },
{ "test_table_collection_union", test_table_collection_union },
{ "test_table_collection_union_errors", test_table_collection_union_errors },
{ "test_table_collection_clear", test_table_collection_clear },
{ NULL, NULL },
};

Expand Down
77 changes: 59 additions & 18 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -6547,7 +6547,7 @@ simplifier_init(simplifier_t *self, const tsk_id_t *samples, size_t num_samples,
ret = TSK_ERR_NO_MEMORY;
goto out;
}
ret = tsk_table_collection_clear(self->tables);
ret = tsk_table_collection_clear(self->tables, 0);
if (ret != 0) {
goto out;
}
Expand Down Expand Up @@ -7015,11 +7015,7 @@ simplifier_finalise_references(simplifier_t *self)
}
}

ret = tsk_provenance_table_copy(
&self->input_tables.provenances, &self->tables->provenances, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
ret = 0;
out:
tsk_safe_free(population_referenced);
tsk_safe_free(individual_referenced);
Expand Down Expand Up @@ -9018,12 +9014,63 @@ tsk_table_collection_truncate(tsk_table_collection_t *tables, tsk_bookmark_t *po
}

int TSK_WARN_UNUSED
tsk_table_collection_clear(tsk_table_collection_t *self)
tsk_table_collection_clear(tsk_table_collection_t *self, tsk_flags_t options)
{
tsk_bookmark_t start;
int ret = 0;
bool clear_provenance = !!(options & TSK_CLEAR_PROVENANCE);
bool clear_metadata_schemas = !!(options & TSK_CLEAR_METADATA_SCHEMAS);
bool clear_ts_metadata = !!(options & TSK_CLEAR_TS_METADATA_AND_SCHEMA);
tsk_bookmark_t rows_to_retain
= { .provenances = clear_provenance ? 0 : self->provenances.num_rows };

ret = tsk_table_collection_truncate(self, &rows_to_retain);
if (ret != 0) {
goto out;
}

memset(&start, 0, sizeof(start));
return tsk_table_collection_truncate(self, &start);
if (clear_metadata_schemas) {
ret = tsk_individual_table_set_metadata_schema(&self->individuals, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_node_table_set_metadata_schema(&self->nodes, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_set_metadata_schema(&self->edges, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_set_metadata_schema(&self->migrations, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_set_metadata_schema(&self->sites, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_set_metadata_schema(&self->mutations, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_set_metadata_schema(&self->populations, "", 0);
if (ret != 0) {
goto out;
}
}

if (clear_ts_metadata) {
ret = tsk_table_collection_set_metadata(self, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_set_metadata_schema(self, "", 0);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}

static int
Expand Down Expand Up @@ -9114,7 +9161,7 @@ tsk_table_collection_subset(
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_clear(self);
ret = tsk_table_collection_clear(self, 0);
if (ret != 0) {
goto out;
}
Expand Down Expand Up @@ -9199,13 +9246,7 @@ tsk_table_collection_subset(
ret = TSK_ERR_MIGRATIONS_NOT_SUPPORTED;
goto out;
}

// provenance (new record is added in python)
ret = tsk_provenance_table_copy(
&tables.provenances, &self->provenances, TSK_NO_INIT);
if (ret < 0) {
goto out;
}
ret = 0;

out:
tsk_safe_free(node_map);
Expand Down
26 changes: 24 additions & 2 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,11 @@ typedef struct {
#define TSK_CMP_IGNORE_METADATA (1 << 2)
#define TSK_CMP_IGNORE_TIMESTAMPS (1 << 3)

/* Flags for tables collection clear */
#define TSK_CLEAR_METADATA_SCHEMAS (1 << 0)
#define TSK_CLEAR_TS_METADATA_AND_SCHEMA (1 << 1)
#define TSK_CLEAR_PROVENANCE (1 << 2)

/****************************************************************************/
/* Function signatures */
/****************************************************************************/
Expand Down Expand Up @@ -2251,17 +2256,34 @@ int tsk_table_collection_init(tsk_table_collection_t *self, tsk_flags_t options)
int tsk_table_collection_free(tsk_table_collection_t *self);

/**
@brief Clears all tables in this table collection.
@brief Clears data tables (and optionally provenances and metadata) in
this table collection.

@rst
By default this operation clears all tables except the provenance table, retaining
table metadata schemas and the tree-sequnce level metadata and schema.

**Options**

Options can be specified by providing one or more of the following bitwise
flags:

TSK_CLEAR_PROVENANCE
Additionally clear the provenance table
TSK_CLEAR_METADATA_SCHEMAS
Additionally clear the table metadata schemas
TSK_CLEAR_TS_METADATA_AND_SCHEMA
Additionally clear the tree-sequence metadata and schema

No memory is freed as a result of this operation; please use
:c:func:`tsk_table_collection_free` to free internal resources.
@endrst

@param self A pointer to a tsk_table_collection_t object.
@param options Bitwise clearing options
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_clear(tsk_table_collection_t *self);
int tsk_table_collection_clear(tsk_table_collection_t *self, tsk_flags_t options);

/**
@brief Returns true if the data in the specified table collection is equal
Expand Down
4 changes: 4 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@
reports the size in bytes of those objects.
(:user:`jeromekelleher`, :user:`benjeffery`, :issue:`54`, :pr:`871`)

- Added ``TableCollection.clear`` to clear data table rows and optionally
provenances, table schemas and tree-sequence level metadata and schema.
(:user:`benjeffery`, :issue:`929`, :pr:`1001`)

**Breaking changes**

- The argument to ``ts.dump`` and ``tskit.load`` has been renamed `file` from `path`.
Expand Down
43 changes: 43 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -5469,6 +5469,45 @@ TableCollection_equals(TableCollection *self, PyObject *args, PyObject *kwds)
return ret;
}

static PyObject *
TableCollection_clear(TableCollection *self, PyObject *args, PyObject *kwds)
{
int err;
PyObject *ret = NULL;
tsk_flags_t options = 0;
int clear_provenance = false;
int clear_metadata_schemas = false;
int clear_ts_metadata = false;
static char *kwlist[] = { "clear_provenance", "clear_metadata_schemas",
"clear_ts_metadata_and_schema", NULL };

if (TableCollection_check_state(self)) {
goto out;
}
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iii", kwlist, &clear_provenance,
&clear_metadata_schemas, &clear_ts_metadata)) {
goto out;
}
if (clear_provenance) {
options |= TSK_CLEAR_PROVENANCE;
}
if (clear_metadata_schemas) {
options |= TSK_CLEAR_METADATA_SCHEMAS;
}
if (clear_ts_metadata) {
options |= TSK_CLEAR_TS_METADATA_AND_SCHEMA;
}

err = tsk_table_collection_clear(self->tables, options);
if (err != 0) {
handle_library_error(err);
goto out;
}
ret = Py_BuildValue("");
out:
return ret;
}

static PyObject *
TableCollection_dump(TableCollection *self, PyObject *args, PyObject *kwds)
{
Expand Down Expand Up @@ -5649,6 +5688,10 @@ static PyMethodDef TableCollection_methods[] = {
.ml_meth = (PyCFunction) TableCollection_has_index,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns True if the TableCollection is indexed." },
{ .ml_name = "clear",
.ml_meth = (PyCFunction) TableCollection_clear,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Clears table contents, and optionally provenances and metadata" },
{ .ml_name = "dump",
.ml_meth = (PyCFunction) TableCollection_dump,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
Expand Down
Loading