diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 15c8b8e1c6..1ac4fd4fc9 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -324,6 +324,143 @@ test_table_collection_simplify_errors(void) tsk_table_collection_free(&tables); } +static void +test_table_collection_reference_sequence(void) +{ + int ret; + tsk_table_collection_t tc1, tc2; + + char example_data[100] = "An example string with unicode πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹"; + tsk_size_t example_data_length = (tsk_size_t) strlen(example_data); + char example_url[100] = "An example url with unicode πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹"; + tsk_size_t example_url_length = (tsk_size_t) strlen(example_url); + char example_metadata[100] = "An example metadata with unicode πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹"; + tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata); + char example_schema[100] = "An example schema with unicode πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹"; + tsk_size_t example_schema_length = (tsk_size_t) strlen(example_schema); + + // Test equality + ret = tsk_table_collection_init(&tc1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_init(&tc2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + tc1.reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + CU_ASSERT_NOT_EQUAL_FATAL(tc1.reference_sequence, NULL); + tsk_memset(tc1.reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + + ret = tsk_reference_sequence_set_data( + tc1.reference_sequence, example_data, example_data_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + tc2.reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + CU_ASSERT_NOT_EQUAL_FATAL(tc2.reference_sequence, NULL); + tsk_memset(tc2.reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + + ret = tsk_reference_sequence_set_data( + tc2.reference_sequence, example_data, example_data_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_url( + tc1.reference_sequence, example_url, example_url_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + ret = tsk_reference_sequence_set_url( + tc2.reference_sequence, example_url, example_url_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_metadata( + tc1.reference_sequence, example_metadata, example_metadata_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + ret = tsk_reference_sequence_set_metadata( + tc2.reference_sequence, example_metadata, example_metadata_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_metadata_schema( + tc1.reference_sequence, example_schema, example_schema_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0)); + ret = tsk_reference_sequence_set_metadata_schema( + tc2.reference_sequence, example_schema, example_schema_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + // Test copy + tsk_table_collection_free(&tc1); + tsk_table_collection_free(&tc2); + ret = tsk_table_collection_init(&tc1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + tc1.reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + CU_ASSERT_NOT_EQUAL_FATAL(tc1.reference_sequence, NULL); + tsk_memset(tc1.reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + + ret = tsk_reference_sequence_set_data( + tc1.reference_sequence, example_data, example_data_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_copy(&tc1, &tc2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_url( + tc1.reference_sequence, example_url, example_url_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_metadata( + tc1.reference_sequence, example_metadata, example_metadata_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + ret = tsk_reference_sequence_set_metadata_schema( + tc1.reference_sequence, example_schema, example_schema_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + + // Test dump and load + tsk_table_collection_free(&tc1); + tsk_table_collection_free(&tc2); + ret = tsk_table_collection_init(&tc1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + tc1.sequence_length = 1.0; + + tc1.reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + CU_ASSERT_NOT_EQUAL_FATAL(tc1.reference_sequence, NULL); + tsk_memset(tc1.reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + + ret = tsk_reference_sequence_set_data( + tc1.reference_sequence, example_data, example_data_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_reference_sequence_set_url( + tc1.reference_sequence, example_url, example_url_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_reference_sequence_set_metadata( + tc1.reference_sequence, example_metadata, example_metadata_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_reference_sequence_set_metadata_schema( + tc1.reference_sequence, example_schema, example_schema_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0)); + tsk_table_collection_free(&tc1); + tsk_table_collection_free(&tc2); +} + static void test_table_collection_metadata(void) { @@ -8794,6 +8931,8 @@ main(int argc, char **argv) { "test_table_collection_simplify_errors", test_table_collection_simplify_errors }, { "test_table_collection_time_units", test_table_collection_time_units }, + { "test_table_collection_reference_sequence", + test_table_collection_reference_sequence }, { "test_table_collection_metadata", test_table_collection_metadata }, { "test_simplify_tables_drops_indexes", test_simplify_tables_drops_indexes }, { "test_simplify_empty_tables", test_simplify_empty_tables }, diff --git a/c/tskit/tables.c b/c/tskit/tables.c index e7215325fe..03694e090c 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -627,6 +627,20 @@ write_metadata_schema_header( return fprintf(out, fmt, (int) metadata_schema_length, metadata_schema); } +int +tsk_reference_sequence_free(tsk_reference_sequence_t *self) +{ + if (self != NULL) { + tsk_safe_free(self->data); + tsk_safe_free(self->url); + tsk_safe_free(self->metadata); + tsk_safe_free(self->metadata_schema); + tsk_safe_free(self); + } + + return 0; +} + /************************* * individual table *************************/ @@ -9833,6 +9847,9 @@ tsk_table_collection_free(tsk_table_collection_t *self) tsk_mutation_table_free(&self->mutations); tsk_population_table_free(&self->populations); tsk_provenance_table_free(&self->provenances); + if (self->reference_sequence) { + tsk_reference_sequence_free(self->reference_sequence); + } tsk_safe_free(self->indexes.edge_insertion_order); tsk_safe_free(self->indexes.edge_removal_order); tsk_safe_free(self->file_uuid); @@ -9882,6 +9899,47 @@ tsk_table_collection_equals(const tsk_table_collection_t *self, && tsk_provenance_table_equals( &self->provenances, &other->provenances, options); } + + ret = ret + && ((self->reference_sequence == NULL && other->reference_sequence == NULL) + || ((self->reference_sequence != NULL + && other->reference_sequence != NULL) + && (self->reference_sequence->data_length + == other->reference_sequence->data_length + && self->reference_sequence->url_length + == other->reference_sequence->url_length + && ((options & TSK_CMP_IGNORE_TS_METADATA) + || self->reference_sequence->metadata_length + == other->reference_sequence + ->metadata_length) + && ((options & TSK_CMP_IGNORE_TS_METADATA) + || self->reference_sequence->metadata_schema_length + == other->reference_sequence + ->metadata_schema_length) + && tsk_memcmp(self->reference_sequence->data, + other->reference_sequence->data, + self->reference_sequence->data_length + * sizeof(char)) + == 0 + && tsk_memcmp(self->reference_sequence->url, + other->reference_sequence->url, + self->reference_sequence->url_length + * sizeof(char)) + == 0 + && ((options & TSK_CMP_IGNORE_TS_METADATA) + || tsk_memcmp(self->reference_sequence->metadata, + other->reference_sequence->metadata, + self->reference_sequence->metadata_length + * sizeof(char)) + == 0) + && (TSK_CMP_IGNORE_TS_METADATA + || tsk_memcmp( + self->reference_sequence->metadata_schema, + other->reference_sequence->metadata_schema, + self->reference_sequence + ->metadata_schema_length + * sizeof(char)) + == 0)))); return ret; } @@ -9909,6 +9967,40 @@ tsk_table_collection_set_metadata_schema(tsk_table_collection_t *self, metadata_schema, metadata_schema_length); } +int +tsk_reference_sequence_set_data(tsk_reference_sequence_t *self, + const char *reference_sequence, tsk_size_t reference_sequence_length) +{ + return replace_string( + &self->data, &self->data_length, reference_sequence, reference_sequence_length); +} + +int +tsk_reference_sequence_set_url(tsk_reference_sequence_t *self, + const char *reference_sequence_url, tsk_size_t reference_sequence_url_length) +{ + return replace_string(&self->url, &self->url_length, reference_sequence_url, + reference_sequence_url_length); +} + +int +tsk_reference_sequence_set_metadata(tsk_reference_sequence_t *self, + const char *reference_sequence_metadata, + tsk_size_t reference_sequence_metadata_length) +{ + return replace_string(&self->metadata, &self->metadata_length, + reference_sequence_metadata, reference_sequence_metadata_length); +} + +int +tsk_reference_sequence_set_metadata_schema(tsk_reference_sequence_t *self, + const char *reference_sequence_metadata_schema, + tsk_size_t reference_sequence_metadata_schema_length) +{ + return replace_string(&self->metadata_schema, &self->metadata_schema_length, + reference_sequence_metadata_schema, reference_sequence_metadata_schema_length); +} + int tsk_table_collection_set_indexes(tsk_table_collection_t *self, tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order) @@ -10087,6 +10179,41 @@ tsk_table_collection_copy(const tsk_table_collection_t *self, goto out; } + tsk_reference_sequence_free(dest->reference_sequence); + dest->reference_sequence = NULL; + + if (self->reference_sequence != NULL) { + dest->reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + if (dest->reference_sequence == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + tsk_memset(dest->reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + + ret = tsk_reference_sequence_set_data(dest->reference_sequence, + self->reference_sequence->data, self->reference_sequence->data_length); + if (ret != 0) { + goto out; + } + ret = tsk_reference_sequence_set_url(dest->reference_sequence, + self->reference_sequence->url, self->reference_sequence->url_length); + if (ret != 0) { + goto out; + } + ret = tsk_reference_sequence_set_metadata(dest->reference_sequence, + self->reference_sequence->metadata, + self->reference_sequence->metadata_length); + if (ret != 0) { + goto out; + } + ret = tsk_reference_sequence_set_metadata_schema(dest->reference_sequence, + self->reference_sequence->metadata_schema, + self->reference_sequence->metadata_schema_length); + if (ret != 0) { + goto out; + } + } + out: return ret; } @@ -10301,6 +10428,74 @@ tsk_table_collection_load_indexes(tsk_table_collection_t *self, kastore_t *store return ret; } +static int +tsk_reference_sequence_load(tsk_reference_sequence_t **self, kastore_t *store) +{ + int ret = 0; + char *data = NULL; + char *url = NULL; + char *metadata = NULL; + char *metadata_schema = NULL; + tsk_size_t data_length = 0, url_length, metadata_length, metadata_schema_length; + + read_table_property_t properties[] = { + { "reference_sequence/data", (void **) &data, &data_length, KAS_UINT8, + TSK_COL_OPTIONAL }, + { "reference_sequence/url", (void **) &url, &url_length, KAS_UINT8, + TSK_COL_OPTIONAL }, + { "reference_sequence/metadata", (void **) &metadata, &metadata_length, + KAS_UINT8, TSK_COL_OPTIONAL }, + { "reference_sequence/metadata_schema", (void **) &metadata_schema, + &metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL }, + { .name = NULL }, + }; + + ret = read_table_properties(store, properties, 0); + if (ret != 0) { + goto out; + } + if (*self == NULL + && (data != NULL || url != NULL || metadata != NULL + || metadata_schema != NULL)) { + *self = tsk_malloc(sizeof(tsk_reference_sequence_t)); + if (*self == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + tsk_memset(*self, 0, sizeof(tsk_reference_sequence_t)); + } + if (data != NULL) { + ret = tsk_reference_sequence_set_data(*self, data, (tsk_size_t) data_length); + if (ret != 0) { + goto out; + } + } + if (metadata != NULL) { + ret = tsk_reference_sequence_set_metadata( + *self, metadata, (tsk_size_t) metadata_length); + if (ret != 0) { + goto out; + } + } + if (metadata_schema != NULL) { + ret = tsk_reference_sequence_set_metadata_schema( + *self, metadata_schema, (tsk_size_t) metadata_schema_length); + if (ret != 0) { + goto out; + } + } + if (url != NULL) { + ret = tsk_reference_sequence_set_url(*self, url, (tsk_size_t) url_length); + if (ret != 0) { + goto out; + } + } + +out: + + return ret; +} + static int TSK_WARN_UNUSED tsk_table_collection_loadf_inited(tsk_table_collection_t *self, FILE *file) { @@ -10360,6 +10555,10 @@ tsk_table_collection_loadf_inited(tsk_table_collection_t *self, FILE *file) if (ret != 0) { goto out; } + ret = tsk_reference_sequence_load(&self->reference_sequence, &store); + if (ret != 0) { + goto out; + } ret = kastore_close(&store); if (ret != 0) { goto out; @@ -10460,6 +10659,22 @@ tsk_table_collection_write_format_data(const tsk_table_collection_t *self, return ret; } +static int TSK_WARN_UNUSED +tsk_reference_sequence_dump(const tsk_reference_sequence_t *self, kastore_t *store, + tsk_flags_t TSK_UNUSED(options)) +{ + write_table_col_t write_cols[] = { + { "reference_sequence/data", (void *) self->data, self->data_length, KAS_UINT8 }, + { "reference_sequence/url", (void *) self->url, self->url_length, KAS_UINT8 }, + { "reference_sequence/metadata", (void *) self->metadata, self->metadata_length, + KAS_UINT8 }, + { "reference_sequence/metadata_schema", (void *) self->metadata_schema, + self->metadata_schema_length, KAS_UINT8 }, + { .name = NULL }, + }; + return write_table_cols(store, write_cols, 0); +} + int TSK_WARN_UNUSED tsk_table_collection_dump( const tsk_table_collection_t *self, const char *filename, tsk_flags_t options) @@ -10548,6 +10763,12 @@ tsk_table_collection_dumpf( if (ret != 0) { goto out; } + if (self->reference_sequence) { + ret = tsk_reference_sequence_dump(self->reference_sequence, &store, options); + if (ret != 0) { + goto out; + } + } ret = kastore_close(&store); if (ret != 0) { diff --git a/c/tskit/tables.h b/c/tskit/tables.h index ad8304cbcb..afe4599fb9 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -538,6 +538,17 @@ typedef struct { tsk_size_t *record_offset; } tsk_provenance_table_t; +typedef struct { + char *data; + tsk_size_t data_length; + char *url; + tsk_size_t url_length; + char *metadata; + tsk_size_t metadata_length; + char *metadata_schema; + tsk_size_t metadata_schema_length; +} tsk_reference_sequence_t; + /** @brief A collection of tables defining the data for a tree sequence. */ @@ -554,6 +565,7 @@ typedef struct { /** @brief The metadata schema */ char *metadata_schema; tsk_size_t metadata_schema_length; + tsk_reference_sequence_t *reference_sequence; /** @brief The individual table */ tsk_individual_table_t individuals; /** @brief The node table */ @@ -4104,6 +4116,18 @@ int tsk_table_collection_compute_mutation_parents( int tsk_table_collection_compute_mutation_times( tsk_table_collection_t *self, double *random, tsk_flags_t TSK_UNUSED(options)); +int tsk_reference_sequence_free(tsk_reference_sequence_t *self); +int tsk_reference_sequence_set_data(tsk_reference_sequence_t *self, + const char *reference_sequence, tsk_size_t reference_sequence_length); +int tsk_reference_sequence_set_url(tsk_reference_sequence_t *self, + const char *reference_sequence_url, tsk_size_t reference_sequence_url_length); +int tsk_reference_sequence_set_metadata(tsk_reference_sequence_t *self, + const char *reference_sequence_metadata, + tsk_size_t reference_sequence_metadata_length); +int tsk_reference_sequence_set_metadata_schema(tsk_reference_sequence_t *self, + const char *reference_sequence_metadata_schema, + tsk_size_t reference_sequence_metadata_schema_length); + /** @defgroup TABLE_SORTER_API_GROUP Low-level table sorter API. @{ diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c index 4e873fa8a1..843de984de 100644 --- a/python/_tskitmodule.c +++ b/python/_tskitmodule.c @@ -6860,6 +6860,187 @@ TableCollection_has_index(TableCollection *self) return ret; } +static PyObject * +TableCollection_get_reference_sequence(TableCollection *self, void *closure) +{ + PyObject *ret = NULL; + PyObject *ref_dict = NULL; + PyObject *data = NULL; + PyObject *url = NULL; + PyObject *metadata = NULL; + PyObject *metadata_schema = NULL; + + if (TableCollection_check_state(self) != 0) { + goto out; + } + + if (self->tables->reference_sequence != NULL) { + ref_dict = PyDict_New(); + if (ref_dict == NULL) { + goto out; + } + + data + = make_Py_Unicode_FromStringAndLength(self->tables->reference_sequence->data, + self->tables->reference_sequence->data_length); + if (data == NULL) { + goto out; + } + url = make_Py_Unicode_FromStringAndLength(self->tables->reference_sequence->url, + self->tables->reference_sequence->url_length); + if (url == NULL) { + goto out; + } + metadata = make_Py_Unicode_FromStringAndLength( + self->tables->reference_sequence->metadata, + self->tables->reference_sequence->metadata_length); + if (metadata == NULL) { + goto out; + } + metadata_schema = make_Py_Unicode_FromStringAndLength( + self->tables->reference_sequence->metadata_schema, + self->tables->reference_sequence->metadata_schema_length); + if (metadata_schema == NULL) { + goto out; + } + + if (PyDict_SetItemString(ref_dict, "data", data) != 0) { + goto out; + } + if (PyDict_SetItemString(ref_dict, "url", url) != 0) { + goto out; + } + if (PyDict_SetItemString(ref_dict, "metadata", metadata) != 0) { + goto out; + } + if (PyDict_SetItemString(ref_dict, "metadata_schema", metadata_schema) != 0) { + goto out; + } + ret = ref_dict; + ref_dict = NULL; + + } else { + ret = Py_BuildValue(""); + } + +out: + Py_XDECREF(ref_dict); + Py_XDECREF(data); + Py_XDECREF(url); + Py_XDECREF(metadata); + Py_XDECREF(metadata_schema); + return ret; +} + +static int +TableCollection_set_reference_sequence( + TableCollection *self, PyObject *dict, void *closure) +{ + int err; + int ret = -1; + Py_ssize_t data_length, url_length, metadata_length, metadata_schema_length; + PyObject *data_input = NULL; + const char *data = NULL; + PyObject *url_input = NULL; + const char *url = NULL; + PyObject *metadata_input = NULL; + const char *metadata = NULL; + PyObject *metadata_schema_input = NULL; + const char *metadata_schema = NULL; + + if (TableCollection_check_state(self) != 0) { + goto out; + } + + tsk_reference_sequence_free(self->tables->reference_sequence); + self->tables->reference_sequence = NULL; + if (dict != NULL) { + self->tables->reference_sequence = tsk_malloc(sizeof(tsk_reference_sequence_t)); + if (self->tables->reference_sequence == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + tsk_memset( + self->tables->reference_sequence, 0, sizeof(tsk_reference_sequence_t)); + + /* Get the input values */ + data_input = get_table_dict_value(dict, "data", true); + if (data_input == NULL) { + goto out; + } + + if (data_input != Py_None) { + data = parse_unicode_arg(data_input, &data_length); + if (data == NULL) { + goto out; + } + err = tsk_reference_sequence_set_data( + self->tables->reference_sequence, data, data_length); + if (err != 0) { + handle_tskit_error(err); + goto out; + } + } + url_input = get_table_dict_value(dict, "url", true); + if (url_input == NULL) { + goto out; + } + + if (url_input != Py_None) { + url = parse_unicode_arg(url_input, &url_length); + if (url == NULL) { + goto out; + } + err = tsk_reference_sequence_set_url( + self->tables->reference_sequence, url, url_length); + if (err != 0) { + handle_tskit_error(err); + goto out; + } + } + metadata_input = get_table_dict_value(dict, "metadata", true); + if (metadata_input == NULL) { + goto out; + } + + if (metadata_input != Py_None) { + metadata = parse_unicode_arg(metadata_input, &metadata_length); + if (metadata == NULL) { + goto out; + } + err = tsk_reference_sequence_set_metadata( + self->tables->reference_sequence, metadata, metadata_length); + if (err != 0) { + handle_tskit_error(err); + goto out; + } + } + metadata_schema_input = get_table_dict_value(dict, "metadata_schema", true); + if (metadata_schema_input == NULL) { + goto out; + } + + if (metadata_schema_input != Py_None) { + metadata_schema + = parse_unicode_arg(metadata_schema_input, &metadata_schema_length); + if (metadata_schema == NULL) { + goto out; + } + err = tsk_reference_sequence_set_metadata_schema( + self->tables->reference_sequence, metadata_schema, + metadata_schema_length); + if (err != 0) { + handle_tskit_error(err); + goto out; + } + } + } + + ret = 0; +out: + return ret; +} + static PyObject * TableCollection_equals(TableCollection *self, PyObject *args, PyObject *kwds) { @@ -7105,6 +7286,11 @@ static PyGetSetDef TableCollection_getsetters[] = { .get = (getter) TableCollection_get_metadata_schema, .set = (setter) TableCollection_set_metadata_schema, .doc = "The metadata schema." }, + { .name = "reference_sequence", + .get = (getter) TableCollection_get_reference_sequence, + .set = (setter) TableCollection_set_reference_sequence, + .doc = "The reference sequence." }, + { NULL } /* Sentinel */ }; diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index 28950ff5b6..ce0c6886d6 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -3247,6 +3247,38 @@ def test_named_tuple_init(self): assert metadata_schemas != metadata_schemas3 +class TestReferenceSequence: + def test_ref_seq(self): + tc = tskit.TableCollection(1) + ll_tc = tc._ll_tables + assert ll_tc.reference_sequence is None + + ref_dict = { + "data": "An example data string πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹", + "url": "An example url stringπŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹", + "metadata": "An example metadata string πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹", + "metadata_schema": "An example metadata_schema string πŸŽ„πŸŒ³πŸŒ΄πŸŒ²πŸŽ‹", + } + ll_tc.reference_sequence = ref_dict + assert ll_tc.reference_sequence == ref_dict + + del ll_tc.reference_sequence + assert ll_tc.reference_sequence is None + + ref_dict["data"] = 5 + with pytest.raises(TypeError): + ll_tc.reference_sequence = ref_dict + ref_dict["data"] = {} + with pytest.raises(TypeError): + ll_tc.reference_sequence = ref_dict + ref_dict["data"] = [] + with pytest.raises(TypeError): + ll_tc.reference_sequence = ref_dict + del ref_dict["data"] + with pytest.raises(TypeError): + ll_tc.reference_sequence = ref_dict + + class TestModuleFunctions: """ Tests for the module level functions. diff --git a/python/tskit/_version.py b/python/tskit/_version.py index a415f37e8d..dd44ba1551 100644 --- a/python/tskit/_version.py +++ b/python/tskit/_version.py @@ -1,4 +1,4 @@ # Definitive location for the version number. # During development, should be x.y.z.devN # For beta should be x.y.zbN -tskit_version = "0.3.8.dev1" +tskit_version = "0.4.0a1" diff --git a/python/tskit/tables.py b/python/tskit/tables.py index d973ebc1ee..031887b773 100644 --- a/python/tskit/tables.py +++ b/python/tskit/tables.py @@ -2557,6 +2557,7 @@ class TableCollection: def __init__(self, sequence_length=0): self._ll_tables = _tskit.TableCollection(sequence_length) + self.reference_sequence = None @property def individuals(self):