Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
---------------------
[0.99.3] - 2019-XX-XX
[0.99.3] - 2020-XX-XX
---------------------

In development.

**Breaking changes**

- Change genotypes from unsigned to signed to accomodate missing data
- Change genotypes from unsigned to signed to accommodate missing data
(see :issue:`144` for discussion). This only affects users of the
``tsk_vargen_t`` class. Genotypes are now stored as int8_t and int16_t
types rather than the former unsigned types. The field names in the
Expand Down Expand Up @@ -34,6 +34,10 @@ In development.

**New features**

- Add ``metadata`` and ``metadata_schema`` fields to table collection, with accessors on
tree sequence. These store arbitrary bytes and are optional in the file format.
(:user: `benjeffery`, :pr:`641`)

- Add the ``TSK_KEEP_UNARY`` option to simplify (:user:`gtsambos`). See :issue:`1`
and :pr:`143`.

Expand All @@ -46,7 +50,7 @@ In development.
off (:pr:`462`).

- Tables with metadata now have an optional `metadata_schema` field that can contain
arbitary bytes. (:user:`benjeffery`, :pr:`493`)
arbitrary bytes. (:user:`benjeffery`, :pr:`493`)

- Tables loaded from a file can now be edited in the same way as any other
table collection (:user:`jeromekelleher`, :issue:`536`, :pr:`530`.
Expand Down
3 changes: 2 additions & 1 deletion c/tests/test_file_format.c
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,8 @@ test_metadata_schemas_optional(void)
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t t1, t2;
const char *cols[] = {
/* "metadata_schema", FIXME - add when table collection gets this */
"metadata",
"metadata_schema",
"individuals/metadata_schema",
"populations/metadata_schema",
"nodes/metadata_schema",
Expand Down
90 changes: 90 additions & 0 deletions c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,95 @@ test_table_collection_simplify_errors(void)
tsk_table_collection_free(&tables);
}

static void
test_table_collection_metadata(void)
{
int ret;
tsk_table_collection_t tc1, tc2;

char example_metadata[100] = "An example of metadata with unicode 🎄🌳🌴🌲🎋";
char example_metadata_schema[100]
= "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);
tsk_size_t example_metadata_schema_length
= (tsk_size_t) strlen(example_metadata_schema);

// Test equality
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_init(&tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2));
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2));
ret = tsk_table_collection_set_metadata(
&tc2, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2));
ret = tsk_table_collection_set_metadata_schema(
&tc1, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2));
ret = tsk_table_collection_set_metadata_schema(
&tc2, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2));

// Test copy
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_copy(&tc1, &tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2));

ret = tsk_table_collection_set_metadata_schema(
&tc1, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_copy(&tc1, &tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2));

// Test dump and load with empty metadata and schema
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tc1.sequence_length = 1.0;
ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2));

// Test dump and load with set metadata and schema
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tc1.sequence_length = 1.0;
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata_schema(
&tc1, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2));
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
}

static void
test_node_table(void)
{
Expand Down Expand Up @@ -2830,6 +2919,7 @@ main(int argc, char **argv)
{ "test_provenance_table", test_provenance_table },
{ "test_table_collection_simplify_errors",
test_table_collection_simplify_errors },
{ "test_table_collection_metadata", test_table_collection_metadata },
{ "test_simplify_tables_drops_indexes", test_simplify_tables_drops_indexes },
{ "test_simplify_empty_tables", test_simplify_empty_tables },
{ "test_link_ancestors_no_edges", test_link_ancestors_no_edges },
Expand Down
44 changes: 44 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -5607,6 +5607,49 @@ test_sample_counts_deprecated(void)
tsk_treeseq_free(&ts);
}

static void
test_tree_sequence_metadata(void)
{
int ret;
tsk_table_collection_t tc;
tsk_treeseq_t ts;

char example_metadata[100] = "An example of metadata with unicode 🎄🌳🌴🌲🎋";
char example_metadata_schema[100]
= "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);
tsk_size_t example_metadata_schema_length
= (tsk_size_t) strlen(example_metadata_schema);

ret = tsk_table_collection_init(&tc, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tc.sequence_length = 1.0;
ret = tsk_table_collection_build_index(&tc, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata(
&tc, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata_schema(
&tc, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_init(&ts, &tc, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

CU_ASSERT_EQUAL(tsk_treeseq_get_metadata_length(&ts), example_metadata_length);
CU_ASSERT_EQUAL(
tsk_treeseq_get_metadata_schema_length(&ts), example_metadata_schema_length);
CU_ASSERT_EQUAL(
memcmp(tsk_treeseq_get_metadata(&ts), example_metadata, example_metadata_length),
0);
CU_ASSERT_EQUAL(memcmp(tsk_treeseq_get_metadata_schema(&ts), example_metadata_schema,
example_metadata_schema_length),
0);

tsk_treeseq_free(&ts);
tsk_table_collection_free(&tc);
}

int
main(int argc, char **argv)
{
Expand Down Expand Up @@ -5747,6 +5790,7 @@ main(int argc, char **argv)
{ "test_empty_tree_sequence", test_empty_tree_sequence },
{ "test_zero_edges", test_zero_edges },
{ "test_sample_counts_deprecated", test_sample_counts_deprecated },
{ "test_tree_sequence_metadata", test_tree_sequence_metadata },

{ NULL, NULL },
};
Expand Down
6 changes: 6 additions & 0 deletions c/tests/testlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,8 @@ caterpillar_tree(tsk_size_t n, tsk_size_t num_sites, tsk_size_t num_mutations)
const char *metadata[] = { "This", "is", "some", "metadata" };
const int num_metadatas = sizeof(metadata) / sizeof(*metadata);
const char *metadata_schema = "mock metadata schema";
const char *ts_metadata = "This is a caterpillar tree";
const char *ts_metadata_schema = "The metadata is an example";
const char *prov_timestamp = "a timestamp, should be ISO8601";
const char *prov_record = "Produced by caterpillar_tree for testing purposes";

Expand All @@ -545,6 +547,10 @@ caterpillar_tree(tsk_size_t n, tsk_size_t num_sites, tsk_size_t num_mutations)
CU_ASSERT_FATAL(num_sites > 0 && num_mutations < n - 1);

tables.sequence_length = 1.0;

tsk_table_collection_set_metadata(&tables, ts_metadata, strlen(ts_metadata));
tsk_table_collection_set_metadata_schema(
&tables, ts_metadata_schema, strlen(ts_metadata_schema));
tsk_population_table_set_metadata_schema(
&tables.populations, metadata_schema, strlen(metadata_schema));
tsk_individual_table_set_metadata_schema(
Expand Down
2 changes: 1 addition & 1 deletion c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ to the API or ABI are introduced, i.e., internal refactors of bugfixes.
#define TSK_FILE_FORMAT_NAME "tskit.trees"
#define TSK_FILE_FORMAT_NAME_LENGTH 11
#define TSK_FILE_FORMAT_VERSION_MAJOR 12
#define TSK_FILE_FORMAT_VERSION_MINOR 1
#define TSK_FILE_FORMAT_VERSION_MINOR 2

/**
@defgroup GENERAL_ERROR_GROUP General errors.
Expand Down
95 changes: 94 additions & 1 deletion c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,12 @@ read_table_cols(kastore_t *store, read_table_col_t *read_cols, size_t num_cols)
*read_cols[j].len_dest = (tsk_size_t) -1;
}
for (j = 0; j < num_cols; j++) {
if (kastore_containss(store, read_cols[j].name)) {
ret = kastore_containss(store, read_cols[j].name);
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (ret == 1) {
ret = kastore_gets(
store, read_cols[j].name, read_cols[j].array_dest, &len, &type);
if (ret != 0) {
Expand Down Expand Up @@ -6681,6 +6686,12 @@ tsk_table_collection_print_state(tsk_table_collection_t *self, FILE *out)
{
fprintf(out, "Table collection state\n");
fprintf(out, "sequence_length = %f\n", self->sequence_length);
fprintf(out, "#metadata_schema#\n");
fprintf(out, "%.*s\n", self->metadata_schema_length, self->metadata_schema);
fprintf(out, "#end#metadata_schema\n");
fprintf(out, "#metadata#\n");
fprintf(out, "%.*s\n", self->metadata_length, self->metadata);
fprintf(out, "#end#metadata\n");
tsk_individual_table_print_state(&self->individuals, out);
tsk_node_table_print_state(&self->nodes, out);
tsk_edge_table_print_state(&self->edges, out);
Expand Down Expand Up @@ -6747,6 +6758,8 @@ tsk_table_collection_free(tsk_table_collection_t *self)
tsk_safe_free(self->indexes.edge_insertion_order);
tsk_safe_free(self->indexes.edge_removal_order);
tsk_safe_free(self->file_uuid);
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_schema);
return 0;
}

Expand All @@ -6758,6 +6771,14 @@ bool
tsk_table_collection_equals(tsk_table_collection_t *self, tsk_table_collection_t *other)
{
bool ret = self->sequence_length == other->sequence_length
&& self->metadata_length == other->metadata_length
&& self->metadata_schema_length == other->metadata_schema_length
&& memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0
&& memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0
&& tsk_individual_table_equals(&self->individuals, &other->individuals)
&& tsk_node_table_equals(&self->nodes, &other->nodes)
&& tsk_edge_table_equals(&self->edges, &other->edges)
Expand All @@ -6769,6 +6790,22 @@ tsk_table_collection_equals(tsk_table_collection_t *self, tsk_table_collection_t
return ret;
}

int
tsk_table_collection_set_metadata(
tsk_table_collection_t *self, const char *metadata, tsk_size_t metadata_length)
{
return replace_string(
&self->metadata, &self->metadata_length, metadata, metadata_length);
}

int
tsk_table_collection_set_metadata_schema(tsk_table_collection_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}

static int
tsk_table_collection_set_index(tsk_table_collection_t *self,
tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order)
Expand Down Expand Up @@ -6922,6 +6959,16 @@ tsk_table_collection_copy(
goto out;
}
}
ret = tsk_table_collection_set_metadata(dest, self->metadata, self->metadata_length);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
if (ret != 0) {
goto out;
}

out:
return ret;
}
Expand All @@ -6935,6 +6982,10 @@ tsk_table_collection_read_format_data(tsk_table_collection_t *self, kastore_t *s
int8_t *format_name, *uuid;
double *L;

char *metadata = NULL;
char *metadata_schema = NULL;
size_t metadata_length, metadata_schema_length;

ret = kastore_gets_int8(store, "format/name", &format_name, &len);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
Expand Down Expand Up @@ -7000,6 +7051,45 @@ tsk_table_collection_read_format_data(tsk_table_collection_t *self, kastore_t *s
}
memcpy(self->file_uuid, uuid, TSK_UUID_SIZE);
self->file_uuid[TSK_UUID_SIZE] = '\0';

ret = kastore_containss(store, "metadata");
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (ret == 1) {
ret = kastore_gets_int8(
store, "metadata", (int8_t **) &metadata, (size_t *) &metadata_length);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
ret = tsk_table_collection_set_metadata(
self, metadata, (tsk_size_t) metadata_length);
if (ret != 0) {
goto out;
}
}

ret = kastore_containss(store, "metadata_schema");
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (ret == 1) {
ret = kastore_gets_int8(store, "metadata_schema", (int8_t **) &metadata_schema,
(size_t *) &metadata_schema_length);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
ret = tsk_table_collection_set_metadata_schema(
self, metadata_schema, (tsk_size_t) metadata_schema_length);
if (ret != 0) {
goto out;
}
}

out:
if ((ret ^ (1 << TSK_KAS_ERR_BIT)) == KAS_ERR_KEY_NOT_FOUND) {
ret = TSK_ERR_REQUIRED_COL_NOT_FOUND;
Expand Down Expand Up @@ -7146,6 +7236,9 @@ tsk_table_collection_write_format_data(tsk_table_collection_t *self, kastore_t *
{ "format/version", (void *) version, 2, KAS_UINT32 },
{ "sequence_length", (void *) &self->sequence_length, 1, KAS_FLOAT64 },
{ "uuid", (void *) uuid, TSK_UUID_SIZE, KAS_INT8 },
{ "metadata", (void *) self->metadata, self->metadata_length, KAS_INT8 },
{ "metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_INT8 },
};

ret = tsk_generate_uuid(uuid, 0);
Expand Down
Loading