Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
----------------------
[0.99.16] - 2022-0X-XX
----------------------

- Make dumping of tables and tree seqences to disk a zero-copy operation.
(:user:`benjeffery`, :issue:`2111`, :pr:`2124`)

----------------------
[0.99.15] - 2021-12-07
----------------------
Expand Down
14 changes: 10 additions & 4 deletions c/subprojects/kastore/kastore.c
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,13 @@ kastore_bput(kastore_t *self, const char *key, size_t key_len, const void *array
if (ret != 0) {
goto out;
}
item->borrowed_array = array;
/* TEMP FIX UNTIL NEXT KASTORE RELEASE WITH
* https://github.com/tskit-dev/kastore/pull/185 */
if (array == NULL) {
item->array = malloc(1);
} else {
item->borrowed_array = array;
}
item->array_len = array_len;
out:
return ret;
Expand Down Expand Up @@ -1148,10 +1154,10 @@ kastore_print_state(kastore_t *self, FILE *out)
item = self->items + j;
fprintf(out,
"%.*s: type=%d, key_start=%zu, key_len=%zu, key=%p, "
"array_start=%zu, array_len=%zu, array=%p\n",
"array_start=%zu, array_len=%zu, array=%p, borrowed_array=%p\n",
(int) item->key_len, item->key, item->type, item->key_start, item->key_len,
(void *) item->key, item->array_start, item->array_len,
(void *) item->array);
(void *) item->key, item->array_start, item->array_len, (void *) item->array,
(void *) item->borrowed_array);
}
fprintf(out, "============================\n");
}
73 changes: 33 additions & 40 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ write_offset_col(
uint32_t *offset32 = NULL;
tsk_size_t len = col->num_rows + 1;
tsk_size_t j;
int32_t put_flags = 0;
int type;
const void *data;
bool needs_64 = col->offset_array[col->num_rows] > UINT32_MAX;
Expand All @@ -361,6 +362,7 @@ write_offset_col(
if (options & TSK_DUMP_FORCE_OFFSET_64 || needs_64) {
type = KAS_UINT64;
data = col->offset_array;
put_flags = KAS_BORROWS_ARRAY;
} else {
offset32 = tsk_malloc(len * sizeof(*offset32));
if (offset32 == NULL) {
Expand All @@ -372,8 +374,9 @@ write_offset_col(
}
type = KAS_UINT32;
data = offset32;
/* We've just allocated a temp buffer, so kas can't borrow so leave put_flags=0*/
}
ret = kastore_puts(store, offset_col_name, data, (size_t) len, type, 0);
ret = kastore_puts(store, offset_col_name, data, (size_t) len, type, put_flags);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
Expand All @@ -392,7 +395,7 @@ write_table_ragged_cols(

for (col = write_cols; col->name != NULL; col++) {
ret = kastore_puts(store, col->name, col->data_array, (size_t) col->data_len,
col->data_type, 0);
col->data_type, KAS_BORROWS_ARRAY);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
Expand All @@ -414,8 +417,8 @@ write_table_cols(kastore_t *store, const write_table_col_t *write_cols,
const write_table_col_t *col;

for (col = write_cols; col->name != NULL; col++) {
ret = kastore_puts(
store, col->name, col->array, (size_t) col->len, col->type, 0);
ret = kastore_puts(store, col->name, col->array, (size_t) col->len, col->type,
KAS_BORROWS_ARRAY);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
Expand Down Expand Up @@ -10690,39 +10693,6 @@ tsk_table_collection_load(
return ret;
}

static int TSK_WARN_UNUSED
tsk_table_collection_write_format_data(const tsk_table_collection_t *self,
kastore_t *store, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
char format_name[TSK_FILE_FORMAT_NAME_LENGTH];
char uuid[TSK_UUID_SIZE + 1]; // Must include space for trailing null.
uint32_t version[2]
= { TSK_FILE_FORMAT_VERSION_MAJOR, TSK_FILE_FORMAT_VERSION_MINOR };
write_table_col_t write_cols[] = {
{ "format/name", (void *) format_name, sizeof(format_name), KAS_INT8 },
{ "format/version", (void *) version, 2, KAS_UINT32 },
{ "sequence_length", (const void *) &self->sequence_length, 1, KAS_FLOAT64 },
{ "uuid", (void *) uuid, TSK_UUID_SIZE, KAS_INT8 },
{ "time_units", (void *) self->time_units, self->time_units_length, KAS_INT8 },
{ "metadata", (void *) self->metadata, self->metadata_length, KAS_INT8 },
{ "metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_INT8 },
{ .name = NULL },
};

ret = tsk_generate_uuid(uuid, 0);
if (ret != 0) {
goto out;
}
/* This stupid dance is to workaround the fact that compilers won't allow
* casts to discard the 'const' qualifier. */
tsk_memcpy(format_name, TSK_FILE_FORMAT_NAME, sizeof(format_name));
ret = write_table_cols(store, write_cols, 0);
out:
return ret;
}

static int TSK_WARN_UNUSED
tsk_table_collection_dump_reference_sequence(const tsk_table_collection_t *self,
kastore_t *store, tsk_flags_t TSK_UNUSED(options))
Expand Down Expand Up @@ -10781,6 +10751,22 @@ tsk_table_collection_dumpf(
{
int ret = 0;
kastore_t store;
char uuid[TSK_UUID_SIZE + 1]; // Must include space for trailing null.
write_table_col_t format_columns[] = {
{ "format/name", (const void *) &TSK_FILE_FORMAT_NAME,
TSK_FILE_FORMAT_NAME_LENGTH, KAS_INT8 },
{ "format/version",
(const void *) &(uint32_t[]){
TSK_FILE_FORMAT_VERSION_MAJOR, TSK_FILE_FORMAT_VERSION_MINOR },
2, KAS_UINT32 },
{ "sequence_length", (const void *) &self->sequence_length, 1, KAS_FLOAT64 },
{ "uuid", (void *) uuid, TSK_UUID_SIZE, KAS_INT8 },
{ "time_units", (void *) self->time_units, self->time_units_length, KAS_INT8 },
{ "metadata", (void *) self->metadata, self->metadata_length, KAS_INT8 },
{ "metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_INT8 },
{ .name = NULL },
};

tsk_memset(&store, 0, sizeof(store));

Expand All @@ -10790,12 +10776,19 @@ tsk_table_collection_dumpf(
goto out;
}

/* All of these functions will set the kas_error internally, so we don't have
* to modify the return value. */
ret = tsk_table_collection_write_format_data(self, &store, options);
/* Write format data */
ret = tsk_generate_uuid(uuid, 0);
if (ret != 0) {
goto out;
}

ret = write_table_cols(&store, format_columns, options);
if (ret != 0) {
goto out;
}

/* All of these functions will set the kas_error internally, so we don't have
* to modify the return value. */
ret = tsk_node_table_dump(&self->nodes, &store, options);
if (ret != 0) {
goto out;
Expand Down
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
- ``VcfWriter.write`` now prints the site ID of variants in the ID field of the output VCF files.
(:user:`roohy`, :issue:`2103`, :pr:`2107`)

- Make dumping of tables and tree seqences to disk a zero-copy operation.
(:user:`benjeffery`, :issue:`2111`, :pr:`2124`)

**Breaking Changes**

- The JSON metadata codec now interprets the empty string as an empty object. This means
Expand Down