From 5fb35025bcdd5970992d06be3605b681220d2951 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Fri, 12 Nov 2021 13:15:05 +0000 Subject: [PATCH 1/3] WIP --- c/tests/test_tables.c | 121 +++++++++++++++++++++++++ c/tskit/tables.c | 206 ++++++++++++++++++++++++++++++++++++++++++ c/tskit/tables.h | 23 +++++ 3 files changed, 350 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 15c8b8e1c6..fe4faf6881 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -324,6 +324,125 @@ test_table_collection_simplify_errors(void) tsk_table_collection_free(&tables); } +static void +test_table_collection_reference_sequence(void) +{ + int ret; + tsk_table_collection_t tc1, tc2; + + char example_data[100] = "An example string with unicode πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹"; + tsk_size_t example_data_length = (tsk_size_t) strlen(example_data); + char example_url[100] = "An example url with unicode πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹"; + tsk_size_t example_url_length = (tsk_size_t) strlen(example_url); + char example_metadata[100] = "An example metadata with unicode πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹"; + tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata); + char example_schema[100] = "An example schema with unicode πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹"; + tsk_size_t example_schema_length = (tsk_size_t) strlen(example_schema); + + // Test equality + ret = tsk_table_collection_init(&tc1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_init(&tc2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_data( + &tc1.reference_sequence, example_data, example_data_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + ret = tsk_reference_sequence_set_data( + &tc2.reference_sequence, example_data, example_data_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_url( + &tc1.reference_sequence, example_url, example_url_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + ret = tsk_reference_sequence_set_url( + &tc2.reference_sequence, example_url, example_url_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_metadata( + &tc1.reference_sequence, example_metadata, example_metadata_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + ret = tsk_reference_sequence_set_metadata( + &tc2.reference_sequence, example_metadata, example_metadata_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_metadata_schema( + &tc1.reference_sequence, example_schema, example_schema_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + ret = tsk_reference_sequence_set_metadata_schema( + &tc2.reference_sequence, example_schema, example_schema_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + // Test copy + tsk_table_collection_free(&tc1); + tsk_table_collection_free(&tc2); + ret = tsk_table_collection_init(&tc1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_reference_sequence_set_data( + &tc1.reference_sequence, example_data, example_data_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_copy(&tc1, &tc2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_url( + &tc1.reference_sequence, example_url, example_url_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_metadata( + &tc1.reference_sequence, example_metadata, example_metadata_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_metadata_schema( + &tc1.reference_sequence, example_schema, example_schema_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + // Test dump and load + tsk_table_collection_free(&tc1); + tsk_table_collection_free(&tc2); + ret = tsk_table_collection_init(&tc1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + tc1.sequence_length = 1.0; + ret = tsk_reference_sequence_set_data( + &tc1.reference_sequence, example_data, example_data_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_reference_sequence_set_url( + &tc1.reference_sequence, example_url, example_url_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_reference_sequence_set_metadata( + &tc1.reference_sequence, example_metadata, example_metadata_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_reference_sequence_set_metadata_schema( + &tc1.reference_sequence, example_schema, example_schema_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + tsk_table_collection_free(&tc1); + tsk_table_collection_free(&tc2); +} + static void test_table_collection_metadata(void) { @@ -8794,6 +8913,8 @@ main(int argc, char **argv) { "test_table_collection_simplify_errors", test_table_collection_simplify_errors }, { "test_table_collection_time_units", test_table_collection_time_units }, + { "test_table_collection_reference_sequence", + test_table_collection_reference_sequence }, { "test_table_collection_metadata", test_table_collection_metadata }, { "test_simplify_tables_drops_indexes", test_simplify_tables_drops_indexes }, { "test_simplify_empty_tables", test_simplify_empty_tables }, diff --git a/c/tskit/tables.c b/c/tskit/tables.c index e7215325fe..583f489f08 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -627,6 +627,16 @@ write_metadata_schema_header( return fprintf(out, fmt, (int) metadata_schema_length, metadata_schema); } +static int +tsk_reference_sequence_free(tsk_reference_sequence_t *self) +{ + tsk_safe_free(self->data); + tsk_safe_free(self->url); + tsk_safe_free(self->metadata); + tsk_safe_free(self->metadata_schema); + return 0; +} + /************************* * individual table *************************/ @@ -9833,6 +9843,7 @@ tsk_table_collection_free(tsk_table_collection_t *self) tsk_mutation_table_free(&self->mutations); tsk_population_table_free(&self->populations); tsk_provenance_table_free(&self->provenances); + tsk_reference_sequence_free(&self->reference_sequence); tsk_safe_free(self->indexes.edge_insertion_order); tsk_safe_free(self->indexes.edge_removal_order); tsk_safe_free(self->file_uuid); @@ -9882,6 +9893,31 @@ tsk_table_collection_equals(const tsk_table_collection_t *self, && tsk_provenance_table_equals( &self->provenances, &other->provenances, options); } + ret = ret + && self->reference_sequence.url_length == other->reference_sequence.url_length + && tsk_memcmp(self->reference_sequence.url, other->reference_sequence.url, + self->reference_sequence.url_length * sizeof(char)) + == 0; + // TODO Ignore flags + ret = ret + && self->reference_sequence.data_length + == other->reference_sequence.data_length + && tsk_memcmp(self->reference_sequence.data, other->reference_sequence.data, + self->reference_sequence.data_length * sizeof(char)) + == 0; + ret = ret + && (self->reference_sequence.metadata_length + == other->reference_sequence.metadata_length + && self->reference_sequence.metadata_schema_length + == other->reference_sequence.metadata_schema_length + && tsk_memcmp(self->reference_sequence.metadata, + other->reference_sequence.metadata, + self->reference_sequence.metadata_length * sizeof(char)) + == 0 + && tsk_memcmp(self->reference_sequence.metadata_schema, + other->reference_sequence.metadata_schema, + self->reference_sequence.metadata_schema_length * sizeof(char)) + == 0); return ret; } @@ -9909,6 +9945,40 @@ tsk_table_collection_set_metadata_schema(tsk_table_collection_t *self, metadata_schema, metadata_schema_length); } +int +tsk_reference_sequence_set_data(tsk_reference_sequence_t *self, + const char *reference_sequence, tsk_size_t reference_sequence_length) +{ + return replace_string( + &self->data, &self->data_length, reference_sequence, reference_sequence_length); +} + +int +tsk_reference_sequence_set_url(tsk_reference_sequence_t *self, + const char *reference_sequence_url, tsk_size_t reference_sequence_url_length) +{ + return replace_string(&self->url, &self->url_length, reference_sequence_url, + reference_sequence_url_length); +} + +int +tsk_reference_sequence_set_metadata(tsk_reference_sequence_t *self, + const char *reference_sequence_metadata, + tsk_size_t reference_sequence_metadata_length) +{ + return replace_string(&self->metadata, &self->metadata_length, + reference_sequence_metadata, reference_sequence_metadata_length); +} + +int +tsk_reference_sequence_set_metadata_schema(tsk_reference_sequence_t *self, + const char *reference_sequence_metadata_schema, + tsk_size_t reference_sequence_metadata_schema_length) +{ + return replace_string(&self->metadata_schema, &self->metadata_schema_length, + reference_sequence_metadata_schema, reference_sequence_metadata_schema_length); +} + int tsk_table_collection_set_indexes(tsk_table_collection_t *self, tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order) @@ -10087,6 +10157,29 @@ tsk_table_collection_copy(const tsk_table_collection_t *self, goto out; } + // TODO separate func + ret = tsk_reference_sequence_set_data(&dest->reference_sequence, + self->reference_sequence.data, self->reference_sequence.data_length); + if (ret != 0) { + goto out; + } + ret = tsk_reference_sequence_set_url(&dest->reference_sequence, + self->reference_sequence.url, self->reference_sequence.url_length); + if (ret != 0) { + goto out; + } + ret = tsk_reference_sequence_set_metadata(&dest->reference_sequence, + self->reference_sequence.metadata, self->reference_sequence.metadata_length); + if (ret != 0) { + goto out; + } + ret = tsk_reference_sequence_set_metadata_schema(&dest->reference_sequence, + self->reference_sequence.metadata_schema, + self->reference_sequence.metadata_schema_length); + if (ret != 0) { + goto out; + } + out: return ret; } @@ -10301,6 +10394,95 @@ tsk_table_collection_load_indexes(tsk_table_collection_t *self, kastore_t *store return ret; } +static int +tsk_reference_sequence_load(tsk_reference_sequence_t *self, kastore_t *store) +{ + int ret = 0; + char *data = NULL; + char *url = NULL; + char *metadata = NULL; + char *metadata_schema = NULL; + tsk_size_t data_length, url_length, metadata_length, metadata_schema_length; + + // TODO - should be a loop + ret = kastore_containss(store, "reference_sequence/data"); + if (ret < 0) { + ret = tsk_set_kas_error(ret); + goto out; + } + if (ret == 1) { + ret = kastore_gets_int8( + store, "reference_sequence/data", (int8_t **) &data, &data_length); + if (ret != 0) { + ret = tsk_set_kas_error(ret); + goto out; + } + ret = tsk_reference_sequence_set_data(self, data, (tsk_size_t) data_length); + if (ret != 0) { + goto out; + } + } + + ret = kastore_containss(store, "reference_sequence/metadata"); + if (ret < 0) { + ret = tsk_set_kas_error(ret); + goto out; + } + if (ret == 1) { + ret = kastore_gets_int8(store, "reference_sequence/metadata", + (int8_t **) &metadata, &metadata_length); + if (ret != 0) { + ret = tsk_set_kas_error(ret); + goto out; + } + ret = tsk_reference_sequence_set_metadata( + self, metadata, (tsk_size_t) metadata_length); + if (ret != 0) { + goto out; + } + } + + ret = kastore_containss(store, "reference_sequence/metadata_schema"); + if (ret < 0) { + ret = tsk_set_kas_error(ret); + goto out; + } + if (ret == 1) { + ret = kastore_gets_int8(store, "reference_sequence/metadata_schema", + (int8_t **) &metadata_schema, &metadata_schema_length); + if (ret != 0) { + ret = tsk_set_kas_error(ret); + goto out; + } + ret = tsk_reference_sequence_set_metadata_schema( + self, metadata_schema, (tsk_size_t) metadata_schema_length); + if (ret != 0) { + goto out; + } + } + + ret = kastore_containss(store, "reference_sequence/url"); + if (ret < 0) { + ret = tsk_set_kas_error(ret); + goto out; + } + if (ret == 1) { + ret = kastore_gets_int8( + store, "reference_sequence/url", (int8_t **) &url, &url_length); + if (ret != 0) { + ret = tsk_set_kas_error(ret); + goto out; + } + ret = tsk_reference_sequence_set_url(self, url, (tsk_size_t) url_length); + if (ret != 0) { + goto out; + } + } +out: + + return ret; +} + static int TSK_WARN_UNUSED tsk_table_collection_loadf_inited(tsk_table_collection_t *self, FILE *file) { @@ -10360,6 +10542,10 @@ tsk_table_collection_loadf_inited(tsk_table_collection_t *self, FILE *file) if (ret != 0) { goto out; } + ret = tsk_reference_sequence_load(&self->reference_sequence, &store); + if (ret != 0) { + goto out; + } ret = kastore_close(&store); if (ret != 0) { goto out; @@ -10460,6 +10646,22 @@ tsk_table_collection_write_format_data(const tsk_table_collection_t *self, return ret; } +static int TSK_WARN_UNUSED +tsk_reference_sequence_dump(const tsk_reference_sequence_t *self, kastore_t *store, + tsk_flags_t TSK_UNUSED(options)) +{ + write_table_col_t write_cols[] = { + { "reference_sequence/data", (void *) self->data, self->data_length, KAS_INT8 }, + { "reference_sequence/url", (void *) self->url, self->url_length, KAS_INT8 }, + { "reference_sequence/metadata", (void *) self->metadata, self->metadata_length, + KAS_INT8 }, + { "reference_sequence/metadata_schema", (void *) self->metadata_schema, + self->metadata_schema_length, KAS_INT8 }, + { .name = NULL }, + }; + return write_table_cols(store, write_cols, 0); +} + int TSK_WARN_UNUSED tsk_table_collection_dump( const tsk_table_collection_t *self, const char *filename, tsk_flags_t options) @@ -10548,6 +10750,10 @@ tsk_table_collection_dumpf( if (ret != 0) { goto out; } + ret = tsk_reference_sequence_dump(&self->reference_sequence, &store, options); + if (ret != 0) { + goto out; + } ret = kastore_close(&store); if (ret != 0) { diff --git a/c/tskit/tables.h b/c/tskit/tables.h index ad8304cbcb..8d588b7302 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -538,6 +538,17 @@ typedef struct { tsk_size_t *record_offset; } tsk_provenance_table_t; +typedef struct { + char *data; + tsk_size_t data_length; + char *url; + tsk_size_t url_length; + char *metadata; + tsk_size_t metadata_length; + char *metadata_schema; + tsk_size_t metadata_schema_length; +} tsk_reference_sequence_t; + /** @brief A collection of tables defining the data for a tree sequence. */ @@ -554,6 +565,7 @@ typedef struct { /** @brief The metadata schema */ char *metadata_schema; tsk_size_t metadata_schema_length; + tsk_reference_sequence_t reference_sequence; /** @brief The individual table */ tsk_individual_table_t individuals; /** @brief The node table */ @@ -4104,6 +4116,17 @@ int tsk_table_collection_compute_mutation_parents( int tsk_table_collection_compute_mutation_times( tsk_table_collection_t *self, double *random, tsk_flags_t TSK_UNUSED(options)); +int tsk_reference_sequence_set_data(tsk_reference_sequence_t *self, + const char *reference_sequence, tsk_size_t reference_sequence_length); +int tsk_reference_sequence_set_url(tsk_reference_sequence_t *self, + const char *reference_sequence_url, tsk_size_t reference_sequence_url_length); +int tsk_reference_sequence_set_metadata(tsk_reference_sequence_t *self, + const char *reference_sequence_metadata, + tsk_size_t reference_sequence_metadata_length); +int tsk_reference_sequence_set_metadata_schema(tsk_reference_sequence_t *self, + const char *reference_sequence_metadata_schema, + tsk_size_t reference_sequence_metadata_schema_length); + /** @defgroup TABLE_SORTER_API_GROUP Low-level table sorter API. @{ From d7b522ec005407dc6a77cd67c42272207e472521 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Tue, 16 Nov 2021 14:58:26 +0000 Subject: [PATCH 2/3] Ref seq as pointer --- c/tests/test_tables.c | 50 +++++--- c/tskit/tables.c | 239 +++++++++++++++++++++------------------ c/tskit/tables.h | 3 +- python/tskit/_version.py | 2 +- 4 files changed, 164 insertions(+), 130 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index fe4faf6881..1ac4fd4fc9 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -346,39 +346,48 @@ test_table_collection_reference_sequence(void) CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + tc1.reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + CU_ASSERT_NOT_EQUAL_FATAL(tc1.reference_sequence, NULL); + tsk_memset(tc1.reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + ret = tsk_reference_sequence_set_data( - &tc1.reference_sequence, example_data, example_data_length); + tc1.reference_sequence, example_data, example_data_length); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + tc2.reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + CU_ASSERT_NOT_EQUAL_FATAL(tc2.reference_sequence, NULL); + tsk_memset(tc2.reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + ret = tsk_reference_sequence_set_data( - &tc2.reference_sequence, example_data, example_data_length); + tc2.reference_sequence, example_data, example_data_length); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_reference_sequence_set_url( - &tc1.reference_sequence, example_url, example_url_length); + tc1.reference_sequence, example_url, example_url_length); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_reference_sequence_set_url( - &tc2.reference_sequence, example_url, example_url_length); + tc2.reference_sequence, example_url, example_url_length); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_reference_sequence_set_metadata( - &tc1.reference_sequence, example_metadata, example_metadata_length); + tc1.reference_sequence, example_metadata, example_metadata_length); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_reference_sequence_set_metadata( - &tc2.reference_sequence, example_metadata, example_metadata_length); + tc2.reference_sequence, example_metadata, example_metadata_length); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_reference_sequence_set_metadata_schema( - &tc1.reference_sequence, example_schema, example_schema_length); + tc1.reference_sequence, example_schema, example_schema_length); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_reference_sequence_set_metadata_schema( - &tc2.reference_sequence, example_schema, example_schema_length); + tc2.reference_sequence, example_schema, example_schema_length); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); @@ -388,29 +397,33 @@ test_table_collection_reference_sequence(void) ret = tsk_table_collection_init(&tc1, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); + tc1.reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + CU_ASSERT_NOT_EQUAL_FATAL(tc1.reference_sequence, NULL); + tsk_memset(tc1.reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + ret = tsk_reference_sequence_set_data( - &tc1.reference_sequence, example_data, example_data_length); + tc1.reference_sequence, example_data, example_data_length); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_copy(&tc1, &tc2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_reference_sequence_set_url( - &tc1.reference_sequence, example_url, example_url_length); + tc1.reference_sequence, example_url, example_url_length); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_reference_sequence_set_metadata( - &tc1.reference_sequence, example_metadata, example_metadata_length); + tc1.reference_sequence, example_metadata, example_metadata_length); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); ret = tsk_reference_sequence_set_metadata_schema( - &tc1.reference_sequence, example_schema, example_schema_length); + tc1.reference_sequence, example_schema, example_schema_length); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -422,17 +435,22 @@ test_table_collection_reference_sequence(void) ret = tsk_table_collection_init(&tc1, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); tc1.sequence_length = 1.0; + + tc1.reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + CU_ASSERT_NOT_EQUAL_FATAL(tc1.reference_sequence, NULL); + tsk_memset(tc1.reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + ret = tsk_reference_sequence_set_data( - &tc1.reference_sequence, example_data, example_data_length); + tc1.reference_sequence, example_data, example_data_length); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_reference_sequence_set_url( - &tc1.reference_sequence, example_url, example_url_length); + tc1.reference_sequence, example_url, example_url_length); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_reference_sequence_set_metadata( - &tc1.reference_sequence, example_metadata, example_metadata_length); + tc1.reference_sequence, example_metadata, example_metadata_length); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_reference_sequence_set_metadata_schema( - &tc1.reference_sequence, example_schema, example_schema_length); + tc1.reference_sequence, example_schema, example_schema_length); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 583f489f08..03694e090c 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -627,13 +627,17 @@ write_metadata_schema_header( return fprintf(out, fmt, (int) metadata_schema_length, metadata_schema); } -static int +int tsk_reference_sequence_free(tsk_reference_sequence_t *self) { - tsk_safe_free(self->data); - tsk_safe_free(self->url); - tsk_safe_free(self->metadata); - tsk_safe_free(self->metadata_schema); + if (self != NULL) { + tsk_safe_free(self->data); + tsk_safe_free(self->url); + tsk_safe_free(self->metadata); + tsk_safe_free(self->metadata_schema); + tsk_safe_free(self); + } + return 0; } @@ -9843,7 +9847,9 @@ tsk_table_collection_free(tsk_table_collection_t *self) tsk_mutation_table_free(&self->mutations); tsk_population_table_free(&self->populations); tsk_provenance_table_free(&self->provenances); - tsk_reference_sequence_free(&self->reference_sequence); + if (self->reference_sequence) { + tsk_reference_sequence_free(self->reference_sequence); + } tsk_safe_free(self->indexes.edge_insertion_order); tsk_safe_free(self->indexes.edge_removal_order); tsk_safe_free(self->file_uuid); @@ -9893,31 +9899,47 @@ tsk_table_collection_equals(const tsk_table_collection_t *self, && tsk_provenance_table_equals( &self->provenances, &other->provenances, options); } + ret = ret - && self->reference_sequence.url_length == other->reference_sequence.url_length - && tsk_memcmp(self->reference_sequence.url, other->reference_sequence.url, - self->reference_sequence.url_length * sizeof(char)) - == 0; - // TODO Ignore flags - ret = ret - && self->reference_sequence.data_length - == other->reference_sequence.data_length - && tsk_memcmp(self->reference_sequence.data, other->reference_sequence.data, - self->reference_sequence.data_length * sizeof(char)) - == 0; - ret = ret - && (self->reference_sequence.metadata_length - == other->reference_sequence.metadata_length - && self->reference_sequence.metadata_schema_length - == other->reference_sequence.metadata_schema_length - && tsk_memcmp(self->reference_sequence.metadata, - other->reference_sequence.metadata, - self->reference_sequence.metadata_length * sizeof(char)) - == 0 - && tsk_memcmp(self->reference_sequence.metadata_schema, - other->reference_sequence.metadata_schema, - self->reference_sequence.metadata_schema_length * sizeof(char)) - == 0); + && ((self->reference_sequence == NULL && other->reference_sequence == NULL) + || ((self->reference_sequence != NULL + && other->reference_sequence != NULL) + && (self->reference_sequence->data_length + == other->reference_sequence->data_length + && self->reference_sequence->url_length + == other->reference_sequence->url_length + && ((options & TSK_CMP_IGNORE_TS_METADATA) + || self->reference_sequence->metadata_length + == other->reference_sequence + ->metadata_length) + && ((options & TSK_CMP_IGNORE_TS_METADATA) + || self->reference_sequence->metadata_schema_length + == other->reference_sequence + ->metadata_schema_length) + && tsk_memcmp(self->reference_sequence->data, + other->reference_sequence->data, + self->reference_sequence->data_length + * sizeof(char)) + == 0 + && tsk_memcmp(self->reference_sequence->url, + other->reference_sequence->url, + self->reference_sequence->url_length + * sizeof(char)) + == 0 + && ((options & TSK_CMP_IGNORE_TS_METADATA) + || tsk_memcmp(self->reference_sequence->metadata, + other->reference_sequence->metadata, + self->reference_sequence->metadata_length + * sizeof(char)) + == 0) + && (TSK_CMP_IGNORE_TS_METADATA + || tsk_memcmp( + self->reference_sequence->metadata_schema, + other->reference_sequence->metadata_schema, + self->reference_sequence + ->metadata_schema_length + * sizeof(char)) + == 0)))); return ret; } @@ -10157,27 +10179,39 @@ tsk_table_collection_copy(const tsk_table_collection_t *self, goto out; } - // TODO separate func - ret = tsk_reference_sequence_set_data(&dest->reference_sequence, - self->reference_sequence.data, self->reference_sequence.data_length); - if (ret != 0) { - goto out; - } - ret = tsk_reference_sequence_set_url(&dest->reference_sequence, - self->reference_sequence.url, self->reference_sequence.url_length); - if (ret != 0) { - goto out; - } - ret = tsk_reference_sequence_set_metadata(&dest->reference_sequence, - self->reference_sequence.metadata, self->reference_sequence.metadata_length); - if (ret != 0) { - goto out; - } - ret = tsk_reference_sequence_set_metadata_schema(&dest->reference_sequence, - self->reference_sequence.metadata_schema, - self->reference_sequence.metadata_schema_length); - if (ret != 0) { - goto out; + tsk_reference_sequence_free(dest->reference_sequence); + dest->reference_sequence = NULL; + + if (self->reference_sequence != NULL) { + dest->reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + if (dest->reference_sequence == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + tsk_memset(dest->reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + + ret = tsk_reference_sequence_set_data(dest->reference_sequence, + self->reference_sequence->data, self->reference_sequence->data_length); + if (ret != 0) { + goto out; + } + ret = tsk_reference_sequence_set_url(dest->reference_sequence, + self->reference_sequence->url, self->reference_sequence->url_length); + if (ret != 0) { + goto out; + } + ret = tsk_reference_sequence_set_metadata(dest->reference_sequence, + self->reference_sequence->metadata, + self->reference_sequence->metadata_length); + if (ret != 0) { + goto out; + } + ret = tsk_reference_sequence_set_metadata_schema(dest->reference_sequence, + self->reference_sequence->metadata_schema, + self->reference_sequence->metadata_schema_length); + if (ret != 0) { + goto out; + } } out: @@ -10395,89 +10429,68 @@ tsk_table_collection_load_indexes(tsk_table_collection_t *self, kastore_t *store } static int -tsk_reference_sequence_load(tsk_reference_sequence_t *self, kastore_t *store) +tsk_reference_sequence_load(tsk_reference_sequence_t **self, kastore_t *store) { int ret = 0; char *data = NULL; char *url = NULL; char *metadata = NULL; char *metadata_schema = NULL; - tsk_size_t data_length, url_length, metadata_length, metadata_schema_length; + tsk_size_t data_length = 0, url_length, metadata_length, metadata_schema_length; - // TODO - should be a loop - ret = kastore_containss(store, "reference_sequence/data"); - if (ret < 0) { - ret = tsk_set_kas_error(ret); + read_table_property_t properties[] = { + { "reference_sequence/data", (void **) &data, &data_length, KAS_UINT8, + TSK_COL_OPTIONAL }, + { "reference_sequence/url", (void **) &url, &url_length, KAS_UINT8, + TSK_COL_OPTIONAL }, + { "reference_sequence/metadata", (void **) &metadata, &metadata_length, + KAS_UINT8, TSK_COL_OPTIONAL }, + { "reference_sequence/metadata_schema", (void **) &metadata_schema, + &metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL }, + { .name = NULL }, + }; + + ret = read_table_properties(store, properties, 0); + if (ret != 0) { goto out; } - if (ret == 1) { - ret = kastore_gets_int8( - store, "reference_sequence/data", (int8_t **) &data, &data_length); - if (ret != 0) { - ret = tsk_set_kas_error(ret); - goto out; - } - ret = tsk_reference_sequence_set_data(self, data, (tsk_size_t) data_length); - if (ret != 0) { + if (*self == NULL + && (data != NULL || url != NULL || metadata != NULL + || metadata_schema != NULL)) { + *self = tsk_malloc(sizeof(tsk_reference_sequence_t)); + if (*self == NULL) { + ret = TSK_ERR_NO_MEMORY; goto out; } + tsk_memset(*self, 0, sizeof(tsk_reference_sequence_t)); } - - ret = kastore_containss(store, "reference_sequence/metadata"); - if (ret < 0) { - ret = tsk_set_kas_error(ret); - goto out; - } - if (ret == 1) { - ret = kastore_gets_int8(store, "reference_sequence/metadata", - (int8_t **) &metadata, &metadata_length); + if (data != NULL) { + ret = tsk_reference_sequence_set_data(*self, data, (tsk_size_t) data_length); if (ret != 0) { - ret = tsk_set_kas_error(ret); goto out; } + } + if (metadata != NULL) { ret = tsk_reference_sequence_set_metadata( - self, metadata, (tsk_size_t) metadata_length); + *self, metadata, (tsk_size_t) metadata_length); if (ret != 0) { goto out; } } - - ret = kastore_containss(store, "reference_sequence/metadata_schema"); - if (ret < 0) { - ret = tsk_set_kas_error(ret); - goto out; - } - if (ret == 1) { - ret = kastore_gets_int8(store, "reference_sequence/metadata_schema", - (int8_t **) &metadata_schema, &metadata_schema_length); - if (ret != 0) { - ret = tsk_set_kas_error(ret); - goto out; - } + if (metadata_schema != NULL) { ret = tsk_reference_sequence_set_metadata_schema( - self, metadata_schema, (tsk_size_t) metadata_schema_length); + *self, metadata_schema, (tsk_size_t) metadata_schema_length); if (ret != 0) { goto out; } } - - ret = kastore_containss(store, "reference_sequence/url"); - if (ret < 0) { - ret = tsk_set_kas_error(ret); - goto out; - } - if (ret == 1) { - ret = kastore_gets_int8( - store, "reference_sequence/url", (int8_t **) &url, &url_length); - if (ret != 0) { - ret = tsk_set_kas_error(ret); - goto out; - } - ret = tsk_reference_sequence_set_url(self, url, (tsk_size_t) url_length); + if (url != NULL) { + ret = tsk_reference_sequence_set_url(*self, url, (tsk_size_t) url_length); if (ret != 0) { goto out; } } + out: return ret; @@ -10651,12 +10664,12 @@ tsk_reference_sequence_dump(const tsk_reference_sequence_t *self, kastore_t *sto tsk_flags_t TSK_UNUSED(options)) { write_table_col_t write_cols[] = { - { "reference_sequence/data", (void *) self->data, self->data_length, KAS_INT8 }, - { "reference_sequence/url", (void *) self->url, self->url_length, KAS_INT8 }, + { "reference_sequence/data", (void *) self->data, self->data_length, KAS_UINT8 }, + { "reference_sequence/url", (void *) self->url, self->url_length, KAS_UINT8 }, { "reference_sequence/metadata", (void *) self->metadata, self->metadata_length, - KAS_INT8 }, + KAS_UINT8 }, { "reference_sequence/metadata_schema", (void *) self->metadata_schema, - self->metadata_schema_length, KAS_INT8 }, + self->metadata_schema_length, KAS_UINT8 }, { .name = NULL }, }; return write_table_cols(store, write_cols, 0); @@ -10750,9 +10763,11 @@ tsk_table_collection_dumpf( if (ret != 0) { goto out; } - ret = tsk_reference_sequence_dump(&self->reference_sequence, &store, options); - if (ret != 0) { - goto out; + if (self->reference_sequence) { + ret = tsk_reference_sequence_dump(self->reference_sequence, &store, options); + if (ret != 0) { + goto out; + } } ret = kastore_close(&store); diff --git a/c/tskit/tables.h b/c/tskit/tables.h index 8d588b7302..afe4599fb9 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -565,7 +565,7 @@ typedef struct { /** @brief The metadata schema */ char *metadata_schema; tsk_size_t metadata_schema_length; - tsk_reference_sequence_t reference_sequence; + tsk_reference_sequence_t *reference_sequence; /** @brief The individual table */ tsk_individual_table_t individuals; /** @brief The node table */ @@ -4116,6 +4116,7 @@ int tsk_table_collection_compute_mutation_parents( int tsk_table_collection_compute_mutation_times( tsk_table_collection_t *self, double *random, tsk_flags_t TSK_UNUSED(options)); +int tsk_reference_sequence_free(tsk_reference_sequence_t *self); int tsk_reference_sequence_set_data(tsk_reference_sequence_t *self, const char *reference_sequence, tsk_size_t reference_sequence_length); int tsk_reference_sequence_set_url(tsk_reference_sequence_t *self, diff --git a/python/tskit/_version.py b/python/tskit/_version.py index a415f37e8d..dd44ba1551 100644 --- a/python/tskit/_version.py +++ b/python/tskit/_version.py @@ -1,4 +1,4 @@ # Definitive location for the version number. # During development, should be x.y.z.devN # For beta should be x.y.zbN -tskit_version = "0.3.8.dev1" +tskit_version = "0.4.0a1" From f83cda8c00569e6b80f3db6d56febfa07181a680 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Thu, 18 Nov 2021 14:19:50 +0000 Subject: [PATCH 3/3] WIP --- python/_tskitmodule.c | 186 ++++++++++++++++++++++++++++++++++ python/tests/test_lowlevel.py | 32 ++++++ python/tskit/tables.py | 1 + 3 files changed, 219 insertions(+) diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c index 4e873fa8a1..843de984de 100644 --- a/python/_tskitmodule.c +++ b/python/_tskitmodule.c @@ -6860,6 +6860,187 @@ TableCollection_has_index(TableCollection *self) return ret; } +static PyObject * +TableCollection_get_reference_sequence(TableCollection *self, void *closure) +{ + PyObject *ret = NULL; + PyObject *ref_dict = NULL; + PyObject *data = NULL; + PyObject *url = NULL; + PyObject *metadata = NULL; + PyObject *metadata_schema = NULL; + + if (TableCollection_check_state(self) != 0) { + goto out; + } + + if (self->tables->reference_sequence != NULL) { + ref_dict = PyDict_New(); + if (ref_dict == NULL) { + goto out; + } + + data + = make_Py_Unicode_FromStringAndLength(self->tables->reference_sequence->data, + self->tables->reference_sequence->data_length); + if (data == NULL) { + goto out; + } + url = make_Py_Unicode_FromStringAndLength(self->tables->reference_sequence->url, + self->tables->reference_sequence->url_length); + if (url == NULL) { + goto out; + } + metadata = make_Py_Unicode_FromStringAndLength( + self->tables->reference_sequence->metadata, + self->tables->reference_sequence->metadata_length); + if (metadata == NULL) { + goto out; + } + metadata_schema = make_Py_Unicode_FromStringAndLength( + self->tables->reference_sequence->metadata_schema, + self->tables->reference_sequence->metadata_schema_length); + if (metadata_schema == NULL) { + goto out; + } + + if (PyDict_SetItemString(ref_dict, "data", data) != 0) { + goto out; + } + if (PyDict_SetItemString(ref_dict, "url", url) != 0) { + goto out; + } + if (PyDict_SetItemString(ref_dict, "metadata", metadata) != 0) { + goto out; + } + if (PyDict_SetItemString(ref_dict, "metadata_schema", metadata_schema) != 0) { + goto out; + } + ret = ref_dict; + ref_dict = NULL; + + } else { + ret = Py_BuildValue(""); + } + +out: + Py_XDECREF(ref_dict); + Py_XDECREF(data); + Py_XDECREF(url); + Py_XDECREF(metadata); + Py_XDECREF(metadata_schema); + return ret; +} + +static int +TableCollection_set_reference_sequence( + TableCollection *self, PyObject *dict, void *closure) +{ + int err; + int ret = -1; + Py_ssize_t data_length, url_length, metadata_length, metadata_schema_length; + PyObject *data_input = NULL; + const char *data = NULL; + PyObject *url_input = NULL; + const char *url = NULL; + PyObject *metadata_input = NULL; + const char *metadata = NULL; + PyObject *metadata_schema_input = NULL; + const char *metadata_schema = NULL; + + if (TableCollection_check_state(self) != 0) { + goto out; + } + + tsk_reference_sequence_free(self->tables->reference_sequence); + self->tables->reference_sequence = NULL; + if (dict != NULL) { + self->tables->reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + if (self->tables->reference_sequence == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + tsk_memset( + self->tables->reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + + /* Get the input values */ + data_input = get_table_dict_value(dict, "data", true); + if (data_input == NULL) { + goto out; + } + + if (data_input != Py_None) { + data = parse_unicode_arg(data_input, &data_length); + if (data == NULL) { + goto out; + } + err = tsk_reference_sequence_set_data( + self->tables->reference_sequence, data, data_length); + if (err != 0) { + handle_tskit_error(err); + goto out; + } + } + url_input = get_table_dict_value(dict, "url", true); + if (url_input == NULL) { + goto out; + } + + if (url_input != Py_None) { + url = parse_unicode_arg(url_input, &url_length); + if (url == NULL) { + goto out; + } + err = tsk_reference_sequence_set_url( + self->tables->reference_sequence, url, url_length); + if (err != 0) { + handle_tskit_error(err); + goto out; + } + } + metadata_input = get_table_dict_value(dict, "metadata", true); + if (metadata_input == NULL) { + goto out; + } + + if (metadata_input != Py_None) { + metadata = parse_unicode_arg(metadata_input, &metadata_length); + if (metadata == NULL) { + goto out; + } + err = tsk_reference_sequence_set_metadata( + self->tables->reference_sequence, metadata, metadata_length); + if (err != 0) { + handle_tskit_error(err); + goto out; + } + } + metadata_schema_input = get_table_dict_value(dict, "metadata_schema", true); + if (metadata_schema_input == NULL) { + goto out; + } + + if (metadata_schema_input != Py_None) { + metadata_schema + = parse_unicode_arg(metadata_schema_input, &metadata_schema_length); + if (metadata_schema == NULL) { + goto out; + } + err = tsk_reference_sequence_set_metadata_schema( + self->tables->reference_sequence, metadata_schema, + metadata_schema_length); + if (err != 0) { + handle_tskit_error(err); + goto out; + } + } + } + + ret = 0; +out: + return ret; +} + static PyObject * TableCollection_equals(TableCollection *self, PyObject *args, PyObject *kwds) { @@ -7105,6 +7286,11 @@ static PyGetSetDef TableCollection_getsetters[] = { .get = (getter) TableCollection_get_metadata_schema, .set = (setter) TableCollection_set_metadata_schema, .doc = "The metadata schema." }, + { .name = "reference_sequence", + .get = (getter) TableCollection_get_reference_sequence, + .set = (setter) TableCollection_set_reference_sequence, + .doc = "The reference sequence." }, + { NULL } /* Sentinel */ }; diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index 28950ff5b6..ce0c6886d6 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -3247,6 +3247,38 @@ def test_named_tuple_init(self): assert metadata_schemas != metadata_schemas3 +class TestReferenceSequence: + def test_ref_seq(self): + tc = tskit.TableCollection(1) + ll_tc = tc._ll_tables + assert ll_tc.reference_sequence is None + + ref_dict = { + "data": "An example data string πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹", + "url": "An example url stringπŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹", + "metadata": "An example metadata string πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹", + "metadata_schema": "An example metadata_schema string πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹", + } + ll_tc.reference_sequence = ref_dict + assert ll_tc.reference_sequence == ref_dict + + del ll_tc.reference_sequence + assert ll_tc.reference_sequence is None + + ref_dict["data"] = 5 + with pytest.raises(TypeError): + ll_tc.reference_sequence = ref_dict + ref_dict["data"] = {} + with pytest.raises(TypeError): + ll_tc.reference_sequence = ref_dict + ref_dict["data"] = [] + with pytest.raises(TypeError): + ll_tc.reference_sequence = ref_dict + del ref_dict["data"] + with pytest.raises(TypeError): + ll_tc.reference_sequence = ref_dict + + class TestModuleFunctions: """ Tests for the module level functions. diff --git a/python/tskit/tables.py b/python/tskit/tables.py index d973ebc1ee..031887b773 100644 --- a/python/tskit/tables.py +++ b/python/tskit/tables.py @@ -2557,6 +2557,7 @@ class TableCollection: def __init__(self, sequence_length=0): self._ll_tables = _tskit.TableCollection(sequence_length) + self.reference_sequence = None @property def individuals(self):