diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index d8904b8a09..c0e3a157a5 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -131,7 +131,7 @@ test_table_collection_equals_options(void) CU_ASSERT(ret >= 0); ret = tsk_node_table_add_row(&tc1.nodes, TSK_NODE_IS_SAMPLE, 1.0, 0, 0, NULL, 0); CU_ASSERT(ret >= 0); - ret = tsk_individual_table_add_row(&tc1.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row(&tc1.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT(ret >= 0); ret = tsk_population_table_add_row(&tc1.populations, NULL, 0); CU_ASSERT(ret >= 0); @@ -2088,12 +2088,13 @@ test_individual_table(void) tsk_individual_table_set_max_rows_increment(&table, 1); tsk_individual_table_set_max_metadata_length_increment(&table, 1); tsk_individual_table_set_max_location_length_increment(&table, 1); + tsk_individual_table_set_max_parents_length_increment(&table, 1); tsk_individual_table_print_state(&table, _devnull); for (j = 0; j < (tsk_id_t) num_rows; j++) { ret = tsk_individual_table_add_row(&table, (tsk_flags_t) j, test_location, - spatial_dimension, test_metadata, test_metadata_length); + spatial_dimension, NULL, 0, test_metadata, test_metadata_length); CU_ASSERT_EQUAL_FATAL(ret, j); CU_ASSERT_EQUAL(table.flags[j], (tsk_flags_t) j); for (k = 0; k < spatial_dimension; k++) { @@ -2172,8 +2173,8 @@ test_individual_table(void) for (j = 0; j < (tsk_id_t) num_rows + 1; j++) { metadata_offset[j] = (tsk_size_t) j; } - ret = tsk_individual_table_set_columns( - &table, num_rows, flags, location, location_offset, metadata, metadata_offset); + ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, + location_offset, NULL, NULL, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL(memcmp(table.flags, flags, num_rows * sizeof(uint32_t)), 0); CU_ASSERT_EQUAL( @@ -2192,8 +2193,8 @@ test_individual_table(void) tsk_individual_table_print_state(&table, _devnull); /* Append another num_rows onto the end */ - ret = tsk_individual_table_append_columns( - &table, num_rows, flags, location, location_offset, metadata, metadata_offset); + ret = tsk_individual_table_append_columns(&table, num_rows, flags, location, + location_offset, NULL, NULL, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL(memcmp(table.flags, flags, num_rows * sizeof(uint32_t)), 0); CU_ASSERT_EQUAL( @@ -2235,30 +2236,31 @@ test_individual_table(void) ret = tsk_individual_table_truncate(&table, num_rows + 1); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION); + // TODO: add tests for parent spec /* flags can't be NULL */ - ret = tsk_individual_table_set_columns( - &table, num_rows, NULL, location, location_offset, metadata, metadata_offset); + ret = tsk_individual_table_set_columns(&table, num_rows, NULL, location, + location_offset, NULL, NULL, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); /* location and location offset must be simultaneously NULL or not */ ret = tsk_individual_table_set_columns( - &table, num_rows, flags, location, NULL, metadata, metadata_offset); + &table, num_rows, flags, location, NULL, NULL, NULL, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); - ret = tsk_individual_table_set_columns( - &table, num_rows, flags, NULL, location_offset, metadata, metadata_offset); + ret = tsk_individual_table_set_columns(&table, num_rows, flags, NULL, + location_offset, NULL, NULL, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); /* metadata and metadata offset must be simultaneously NULL or not */ - ret = tsk_individual_table_set_columns( - &table, num_rows, flags, location, location_offset, NULL, metadata_offset); + ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, + location_offset, NULL, NULL, NULL, metadata_offset); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); ret = tsk_individual_table_set_columns( - &table, num_rows, flags, location, location_offset, metadata, NULL); + &table, num_rows, flags, location, location_offset, NULL, NULL, metadata, NULL); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); /* if location and location_offset are both null, all locations are zero length */ num_rows = 10; memset(location_offset, 0, (num_rows + 1) * sizeof(tsk_size_t)); ret = tsk_individual_table_set_columns( - &table, num_rows, flags, NULL, NULL, NULL, NULL); + &table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL(memcmp(table.location_offset, location_offset, (num_rows + 1) * sizeof(tsk_size_t)), @@ -2266,7 +2268,7 @@ test_individual_table(void) CU_ASSERT_EQUAL(table.num_rows, num_rows); CU_ASSERT_EQUAL(table.location_length, 0); ret = tsk_individual_table_append_columns( - &table, num_rows, flags, NULL, NULL, NULL, NULL); + &table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL(memcmp(table.location_offset, location_offset, (num_rows + 1) * sizeof(tsk_size_t)), @@ -2284,7 +2286,7 @@ test_individual_table(void) num_rows = 10; memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t)); ret = tsk_individual_table_set_columns( - &table, num_rows, flags, location, location_offset, NULL, NULL); + &table, num_rows, flags, location, location_offset, NULL, NULL, NULL, NULL); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL(memcmp(table.flags, flags, num_rows * sizeof(uint32_t)), 0); CU_ASSERT_EQUAL( @@ -2296,7 +2298,7 @@ test_individual_table(void) CU_ASSERT_EQUAL(table.num_rows, num_rows); CU_ASSERT_EQUAL(table.metadata_length, 0); ret = tsk_individual_table_append_columns( - &table, num_rows, flags, location, location_offset, NULL, NULL); + &table, num_rows, flags, location, location_offset, NULL, NULL, NULL, NULL); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL( memcmp(table.location, location, spatial_dimension * num_rows * sizeof(double)), @@ -4311,7 +4313,7 @@ test_table_overflow(void) /* Simulate overflows */ tables.individuals.max_rows = max_rows; tables.individuals.num_rows = max_rows; - ret = tsk_individual_table_add_row(&tables.individuals, 0, 0, 0, NULL, 0); + ret = tsk_individual_table_add_row(&tables.individuals, 0, 0, 0, NULL, 0, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW); tables.nodes.max_rows = max_rows; @@ -4366,13 +4368,17 @@ test_column_overflow(void) /* We can't trigger a column overflow with one element because the parameter * value is 32 bit */ - ret = tsk_individual_table_add_row(&tables.individuals, 0, &zero, 1, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, &zero, 1, NULL, 0, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, too_big, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, too_big, NULL, 0, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, zeros, 1); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, zeros, 1); CU_ASSERT_EQUAL_FATAL(ret, 1); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, too_big); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, too_big); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); ret = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, zeros, 1); @@ -4925,9 +4931,11 @@ test_table_collection_subset_with_options(tsk_flags_t options) ret = tsk_node_table_add_row( &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); ret = tsk_population_table_add_row(&tables.populations, NULL, 0); CU_ASSERT_FATAL(ret >= 0); @@ -5018,9 +5026,11 @@ test_table_collection_subset_errors(void) ret = tsk_node_table_add_row( &tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); ret = tsk_population_table_add_row(&tables.populations, NULL, 0); CU_ASSERT_FATAL(ret >= 0); @@ -5106,11 +5116,14 @@ test_table_collection_union(void) CU_ASSERT_FATAL(ret >= 0); ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 2, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); ret = tsk_population_table_add_row(&tables.populations, NULL, 0); CU_ASSERT_FATAL(ret >= 0); @@ -5220,9 +5233,11 @@ test_table_collection_union_errors(void) CU_ASSERT_FATAL(ret >= 0); ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 1, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); ret = tsk_population_table_add_row(&tables.populations, NULL, 0); CU_ASSERT_FATAL(ret >= 0); @@ -5311,9 +5326,11 @@ test_table_collection_clear_with_options(tsk_flags_t options) CU_ASSERT_FATAL(ret >= 0); ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 1, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_FATAL(ret >= 0); ret = tsk_population_table_add_row(&tables.populations, NULL, 0); CU_ASSERT_FATAL(ret >= 0); diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c index d0a677ae59..59a7ca505a 100644 --- a/c/tests/test_trees.c +++ b/c/tests/test_trees.c @@ -1074,11 +1074,11 @@ test_simplest_unary_with_individuals(void) "0 2 4 2,3\n" "0 1 5 4\n" "1 2 6 4\n"; - const char *individuals = "0 0.5\n" - "0 1.5,3.1\n" - "0 2.1\n" - "0 3.2\n" - "0 4.2\n"; + const char *individuals = "0 0.5 -1,-1\n" + "0 1.5,3.1 -1,-1\n" + "0 2.1 -1,-1\n" + "0 3.2 -1,-1\n" + "0 4.2 -1,-1\n"; const char *nodes_expect = "1 0 0 -1\n" "1 0 0 0\n" "0 1 0 1\n" @@ -1089,10 +1089,10 @@ test_simplest_unary_with_individuals(void) "2 3 3 1\n" "0 2 4 0,2\n" "1 2 5 4\n"; - const char *individuals_expect = "0 0.5\n" - "0 1.5,3.1\n" - "0 2.1\n" - "0 3.2\n"; + const char *individuals_expect = "0 0.5 -1,-1\n" + "0 1.5,3.1 -1,-1\n" + "0 2.1 -1,-1\n" + "0 3.2 -1,-1\n"; tsk_treeseq_t ts, simplified, expected; tsk_id_t sample_ids[] = { 0, 1 }; @@ -1887,28 +1887,32 @@ test_simplest_final_gap_tsk_treeseq_mutation_parents(void) static void test_simplest_individuals(void) { - const char *individuals = "1 0.25\n" - "2 0.5,0.25\n"; + const char *individuals = "1 0.25 -1,-1\n" + "2 0.5,0.25 -1,-1\n" + "3 0.3 0,1\n"; const char *nodes = "1 0 -1 -1\n" "1 0 -1 1\n" "0 0 -1 -1\n" "1 0 -1 0\n" - "0 0 -1 1\n"; + "0 0 -1 1\n" + "0 0 -1 2\n"; tsk_table_collection_t tables; tsk_treeseq_t ts; tsk_node_t node; tsk_individual_t individual; tsk_flags_t load_flags = TSK_BUILD_INDEXES; int ret; + tsk_id_t pat_id, mat_id; ret = tsk_table_collection_init(&tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); tables.sequence_length = 1.0; parse_individuals(individuals, &tables.individuals); - CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 2); + CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 3); + parse_nodes(nodes, &tables.nodes); - CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 5); + CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 6); ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -1927,6 +1931,10 @@ test_simplest_individuals(void) CU_ASSERT_EQUAL_FATAL(individual.flags, 1); CU_ASSERT_EQUAL_FATAL(individual.location_length, 1); CU_ASSERT_EQUAL_FATAL(individual.location[0], 0.25); + CU_ASSERT_EQUAL_FATAL(individual.parents_length, 2); + CU_ASSERT_EQUAL_FATAL(individual.parents[0], -1); + CU_ASSERT_EQUAL_FATAL(individual.parents[1], -1); + pat_id = individual.id; CU_ASSERT_EQUAL_FATAL(individual.nodes_length, 1); CU_ASSERT_EQUAL_FATAL(individual.nodes[0], 3); @@ -1937,10 +1945,26 @@ test_simplest_individuals(void) CU_ASSERT_EQUAL_FATAL(individual.location_length, 2); CU_ASSERT_EQUAL_FATAL(individual.location[0], 0.5); CU_ASSERT_EQUAL_FATAL(individual.location[1], 0.25); + CU_ASSERT_EQUAL_FATAL(individual.parents_length, 2); + CU_ASSERT_EQUAL_FATAL(individual.parents[0], -1); + CU_ASSERT_EQUAL_FATAL(individual.parents[1], -1); + mat_id = individual.id; CU_ASSERT_EQUAL_FATAL(individual.nodes_length, 2); CU_ASSERT_EQUAL_FATAL(individual.nodes[0], 1); CU_ASSERT_EQUAL_FATAL(individual.nodes[1], 4); + ret = tsk_treeseq_get_individual(&ts, 2, &individual); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(individual.id, 2); + CU_ASSERT_EQUAL_FATAL(individual.flags, 3); + CU_ASSERT_EQUAL_FATAL(individual.location_length, 1); + CU_ASSERT_EQUAL_FATAL(individual.location[0], 0.3); + CU_ASSERT_EQUAL_FATAL(individual.parents_length, 2); + CU_ASSERT_EQUAL_FATAL(individual.parents[0], pat_id); + CU_ASSERT_EQUAL_FATAL(individual.parents[1], mat_id); + CU_ASSERT_EQUAL_FATAL(individual.nodes_length, 1); + CU_ASSERT_EQUAL_FATAL(individual.nodes[0], 5); + ret = tsk_treeseq_get_individual(&ts, 3, &individual); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS); tsk_treeseq_free(&ts); @@ -2005,9 +2029,11 @@ test_simplest_bad_individuals(void) tables.nodes.individual[0] = TSK_NULL; /* add two individuals */ - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_EQUAL(ret, 0); - ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0); CU_ASSERT_EQUAL(ret, 1); /* Bad individual ID */ @@ -2788,9 +2814,9 @@ test_simplest_individual_filter(void) CU_ASSERT_EQUAL_FATAL(ret, 0); tables.sequence_length = 1; - tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, "0", 1); - tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, "1", 1); - tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, "2", 1); + tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0, "0", 1); + tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0, "1", 1); + tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0, "2", 1); /* Two nodes referring to individual 1 */ tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0); tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0); diff --git a/c/tests/testlib.c b/c/tests/testlib.c index aca2ad3bbb..fccdc23536 100644 --- a/c/tests/testlib.c +++ b/c/tests/testlib.c @@ -90,9 +90,9 @@ const char *paper_ex_sites = "1 0\n" const char *paper_ex_mutations = "0 2 1\n" "1 0 1\n" "2 5 1\n"; -/* Two (diploid) indivduals */ -const char *paper_ex_individuals = "0 0.2,1.5\n" - "0 0.0,0.0\n"; +/* Two (diploid) individuals */ +const char *paper_ex_individuals = "0 0.2,1.5 -1,-1\n" + "0 0.0,0.0 -1,-1\n"; /*** An example of a nonbinary tree sequence ***/ /* @@ -592,8 +592,11 @@ parse_individuals(const char *text, tsk_individual_table_t *individual_table) char sub_line[MAX_LINE]; const char *whitespace = " \t"; char *p, *q; + char *p_cont, *q_cont; // re-entrant pointers for strtok_r double location[MAX_LINE]; int location_len; + tsk_id_t parents[MAX_LINE]; + int parents_len; int flags; char *name; @@ -611,11 +614,11 @@ parse_individuals(const char *text, tsk_individual_table_t *individual_table) c++; } line[k] = '\0'; - p = strtok(line, whitespace); + p = strtok_r(line, whitespace, &p_cont); CU_ASSERT_FATAL(p != NULL); flags = atoi(p); - p = strtok(NULL, whitespace); + p = strtok_r(NULL, whitespace, &p_cont); CU_ASSERT_FATAL(p != NULL); // the locations are comma-separated location_len = 1; @@ -628,21 +631,40 @@ parse_individuals(const char *text, tsk_individual_table_t *individual_table) } CU_ASSERT_FATAL(location_len >= 1); strncpy(sub_line, p, MAX_LINE); - q = strtok(sub_line, ","); + q = strtok_r(sub_line, ",", &q_cont); for (k = 0; k < location_len; k++) { CU_ASSERT_FATAL(q != NULL); location[k] = atof(q); - q = strtok(NULL, ","); + q = strtok_r(NULL, ",", &q_cont); } CU_ASSERT_FATAL(q == NULL); - p = strtok(NULL, whitespace); + p = strtok_r(NULL, whitespace, &p_cont); + // the parents are comma-separated + parents_len = 1; + q = p; + while (*q != '\0') { + if (*q == ',') { + parents_len++; + } + q++; + } + CU_ASSERT_FATAL(parents_len >= 1); + strncpy(sub_line, p, MAX_LINE); + q = strtok_r(sub_line, ",", &q_cont); + for (k = 0; k < parents_len; k++) { + CU_ASSERT_FATAL(q != NULL); + parents[k] = atoi(q); + q = strtok_r(NULL, ",", &q_cont); + } + CU_ASSERT_FATAL(q == NULL); + p = strtok_r(NULL, whitespace, &p_cont); if (p == NULL) { name = ""; } else { name = p; } - ret = tsk_individual_table_add_row( - individual_table, flags, location, location_len, name, strlen(name)); + ret = tsk_individual_table_add_row(individual_table, flags, location, + location_len, parents, parents_len, name, strlen(name)); CU_ASSERT_FATAL(ret >= 0); } } @@ -750,8 +772,8 @@ caterpillar_tree(tsk_size_t n, tsk_size_t num_sites, tsk_size_t num_mutations) ret = tsk_population_table_add_row( &tables.populations, metadata[m], strlen(metadata[m])); CU_ASSERT_EQUAL_FATAL(ret, j); - ret = tsk_individual_table_add_row( - &tables.individuals, 0, position, 2, metadata[m], strlen(metadata[m])); + ret = tsk_individual_table_add_row(&tables.individuals, 0, position, 2, NULL, 0, + metadata[m], strlen(metadata[m])); CU_ASSERT_EQUAL_FATAL(ret, j); ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0, j, j, metadata[m], strlen(metadata[m])); diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 3f3164c87b..e0953a3ed4 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -242,6 +242,11 @@ tsk_individual_table_expand_main_columns( if (ret != 0) { goto out; } + ret = expand_column( + (void **) &self->parents_offset, new_size + 1, sizeof(tsk_size_t)); + if (ret != 0) { + goto out; + } ret = expand_column( (void **) &self->metadata_offset, new_size + 1, sizeof(tsk_size_t)); if (ret != 0) { @@ -277,6 +282,30 @@ tsk_individual_table_expand_location( return ret; } +static int +tsk_individual_table_expand_parents( + tsk_individual_table_t *self, tsk_size_t additional_length) +{ + int ret = 0; + tsk_size_t increment + = TSK_MAX(additional_length, self->max_parents_length_increment); + tsk_size_t new_size = self->max_parents_length + increment; + + if (check_offset_overflow(self->parents_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } + if ((self->parents_length + additional_length) > self->max_parents_length) { + ret = expand_column((void **) &self->parents, new_size, sizeof(tsk_id_t)); + if (ret != 0) { + goto out; + } + self->max_parents_length = new_size; + } +out: + return ret; +} + static int tsk_individual_table_expand_metadata( tsk_individual_table_t *self, tsk_size_t additional_length) @@ -334,6 +363,17 @@ tsk_individual_table_set_max_location_length_increment( return 0; } +int +tsk_individual_table_set_max_parents_length_increment( + tsk_individual_table_t *self, tsk_size_t max_parents_length_increment) +{ + if (max_parents_length_increment == 0) { + max_parents_length_increment = DEFAULT_SIZE_INCREMENT; + } + self->max_parents_length_increment = (tsk_size_t) max_parents_length_increment; + return 0; +} + int tsk_individual_table_init(tsk_individual_table_t *self, tsk_flags_t TSK_UNUSED(options)) { @@ -344,6 +384,7 @@ tsk_individual_table_init(tsk_individual_table_t *self, tsk_flags_t TSK_UNUSED(o * even if the table is empty */ self->max_rows_increment = 1; self->max_location_length_increment = 1; + self->max_parents_length_increment = 1; self->max_metadata_length_increment = 1; ret = tsk_individual_table_expand_main_columns(self, 1); if (ret != 0) { @@ -354,6 +395,11 @@ tsk_individual_table_init(tsk_individual_table_t *self, tsk_flags_t TSK_UNUSED(o goto out; } self->location_offset[0] = 0; + ret = tsk_individual_table_expand_parents(self, 1); + if (ret != 0) { + goto out; + } + self->parents_offset[0] = 0; ret = tsk_individual_table_expand_metadata(self, 1); if (ret != 0) { goto out; @@ -361,6 +407,7 @@ tsk_individual_table_init(tsk_individual_table_t *self, tsk_flags_t TSK_UNUSED(o self->metadata_offset[0] = 0; self->max_rows_increment = DEFAULT_SIZE_INCREMENT; self->max_location_length_increment = DEFAULT_SIZE_INCREMENT; + self->max_parents_length_increment = DEFAULT_SIZE_INCREMENT; self->max_metadata_length_increment = DEFAULT_SIZE_INCREMENT; tsk_individual_table_set_metadata_schema(self, NULL, 0); out: @@ -380,7 +427,8 @@ tsk_individual_table_copy(const tsk_individual_table_t *self, } } ret = tsk_individual_table_set_columns(dest, self->num_rows, self->flags, - self->location, self->location_offset, self->metadata, self->metadata_offset); + self->location, self->location_offset, self->parents, self->parents_offset, + self->metadata, self->metadata_offset); if (ret != 0) { goto out; } @@ -393,7 +441,8 @@ tsk_individual_table_copy(const tsk_individual_table_t *self, int TSK_WARN_UNUSED tsk_individual_table_set_columns(tsk_individual_table_t *self, tsk_size_t num_rows, const tsk_flags_t *flags, const double *location, const tsk_size_t *location_offset, - const char *metadata, const tsk_size_t *metadata_offset) + const tsk_id_t *parents, const tsk_size_t *parents_offset, const char *metadata, + const tsk_size_t *metadata_offset) { int ret; @@ -401,8 +450,8 @@ tsk_individual_table_set_columns(tsk_individual_table_t *self, tsk_size_t num_ro if (ret != 0) { goto out; } - ret = tsk_individual_table_append_columns( - self, num_rows, flags, location, location_offset, metadata, metadata_offset); + ret = tsk_individual_table_append_columns(self, num_rows, flags, location, + location_offset, parents, parents_offset, metadata, metadata_offset); out: return ret; } @@ -410,10 +459,11 @@ tsk_individual_table_set_columns(tsk_individual_table_t *self, tsk_size_t num_ro int tsk_individual_table_append_columns(tsk_individual_table_t *self, tsk_size_t num_rows, const tsk_flags_t *flags, const double *location, const tsk_size_t *location_offset, - const char *metadata, const tsk_size_t *metadata_offset) + const tsk_id_t *parents, const tsk_size_t *parents_offset, const char *metadata, + const tsk_size_t *metadata_offset) { int ret; - tsk_size_t j, metadata_length, location_length; + tsk_size_t j, metadata_length, location_length, parents_length; if (flags == NULL) { ret = TSK_ERR_BAD_PARAM_VALUE; @@ -423,6 +473,10 @@ tsk_individual_table_append_columns(tsk_individual_table_t *self, tsk_size_t num ret = TSK_ERR_BAD_PARAM_VALUE; goto out; } + if ((parents == NULL) != (parents_offset == NULL)) { + ret = TSK_ERR_BAD_PARAM_VALUE; + goto out; + } if ((metadata == NULL) != (metadata_offset == NULL)) { ret = TSK_ERR_BAD_PARAM_VALUE; goto out; @@ -455,6 +509,29 @@ tsk_individual_table_append_columns(tsk_individual_table_t *self, tsk_size_t num location_length * sizeof(double)); self->location_length += location_length; } + if (parents == NULL) { + for (j = 0; j < num_rows; j++) { + self->parents_offset[self->num_rows + j + 1] + = (tsk_size_t) self->parents_length; + } + } else { + ret = check_offsets(num_rows, parents_offset, 0, false); + if (ret != 0) { + goto out; + } + for (j = 0; j < num_rows; j++) { + self->parents_offset[self->num_rows + j] + = (tsk_size_t) self->parents_length + parents_offset[j]; + } + parents_length = parents_offset[num_rows]; + ret = tsk_individual_table_expand_parents(self, parents_length); + if (ret != 0) { + goto out; + } + memcpy(self->parents + self->parents_length, parents, + parents_length * sizeof(tsk_id_t)); + self->parents_length += parents_length; + } if (metadata == NULL) { for (j = 0; j < num_rows; j++) { self->metadata_offset[self->num_rows + j + 1] @@ -480,6 +557,7 @@ tsk_individual_table_append_columns(tsk_individual_table_t *self, tsk_size_t num } self->num_rows += (tsk_size_t) num_rows; self->location_offset[self->num_rows] = self->location_length; + self->parents_offset[self->num_rows] = self->parents_length; self->metadata_offset[self->num_rows] = self->metadata_length; out: return ret; @@ -487,10 +565,12 @@ tsk_individual_table_append_columns(tsk_individual_table_t *self, tsk_size_t num static tsk_id_t tsk_individual_table_add_row_internal(tsk_individual_table_t *self, tsk_flags_t flags, - const double *location, tsk_size_t location_length, const char *metadata, - tsk_size_t metadata_length) + const double *location, tsk_size_t location_length, const tsk_id_t *parents, + const tsk_size_t parents_length, const char *metadata, tsk_size_t metadata_length) { + tsk_bug_assert(self->num_rows < self->max_rows); + tsk_bug_assert(self->parents_length + parents_length <= self->max_parents_length); tsk_bug_assert(self->metadata_length + metadata_length <= self->max_metadata_length); tsk_bug_assert(self->location_length + location_length <= self->max_location_length); self->flags[self->num_rows] = flags; @@ -498,6 +578,10 @@ tsk_individual_table_add_row_internal(tsk_individual_table_t *self, tsk_flags_t location_length * sizeof(double)); self->location_offset[self->num_rows + 1] = self->location_length + location_length; self->location_length += location_length; + memcpy(self->parents + self->parents_length, parents, + parents_length * sizeof(tsk_id_t)); + self->parents_offset[self->num_rows + 1] = self->parents_length + parents_length; + self->parents_length += parents_length; memcpy(self->metadata + self->metadata_length, metadata, metadata_length * sizeof(char)); self->metadata_offset[self->num_rows + 1] = self->metadata_length + metadata_length; @@ -508,8 +592,8 @@ tsk_individual_table_add_row_internal(tsk_individual_table_t *self, tsk_flags_t tsk_id_t tsk_individual_table_add_row(tsk_individual_table_t *self, tsk_flags_t flags, - const double *location, tsk_size_t location_length, const char *metadata, - tsk_size_t metadata_length) + const double *location, tsk_size_t location_length, const tsk_id_t *parents, + tsk_size_t parents_length, const char *metadata, tsk_size_t metadata_length) { int ret = 0; @@ -521,12 +605,17 @@ tsk_individual_table_add_row(tsk_individual_table_t *self, tsk_flags_t flags, if (ret != 0) { goto out; } + ret = tsk_individual_table_expand_parents(self, (tsk_size_t) parents_length); + if (ret != 0) { + goto out; + } ret = tsk_individual_table_expand_metadata(self, (tsk_size_t) metadata_length); if (ret != 0) { goto out; } ret = tsk_individual_table_add_row_internal(self, flags, location, - (tsk_size_t) location_length, metadata, (tsk_size_t) metadata_length); + (tsk_size_t) location_length, parents, (tsk_size_t) parents_length, metadata, + (tsk_size_t) metadata_length); out: return ret; } @@ -548,6 +637,7 @@ tsk_individual_table_truncate(tsk_individual_table_t *self, tsk_size_t num_rows) } self->num_rows = num_rows; self->location_length = self->location_offset[num_rows]; + self->parents_length = self->parents_offset[num_rows]; self->metadata_length = self->metadata_offset[num_rows]; out: return ret; @@ -559,6 +649,8 @@ tsk_individual_table_free(tsk_individual_table_t *self) tsk_safe_free(self->flags); tsk_safe_free(self->location); tsk_safe_free(self->location_offset); + tsk_safe_free(self->parents); + tsk_safe_free(self->parents_offset); tsk_safe_free(self->metadata); tsk_safe_free(self->metadata_offset); tsk_safe_free(self->metadata_schema); @@ -583,6 +675,7 @@ tsk_individual_table_print_state(const tsk_individual_table_t *self, FILE *out) write_metadata_schema_header( out, self->metadata_schema, self->metadata_schema_length); fprintf(out, "id\tflags\tlocation_offset\tlocation\t"); + fprintf(out, "parents_offset\tparents\n"); fprintf(out, "metadata_offset\tmetadata\n"); for (j = 0; j < self->num_rows; j++) { fprintf(out, "%d\t%d\t", (int) j, self->flags[j]); @@ -594,6 +687,14 @@ tsk_individual_table_print_state(const tsk_individual_table_t *self, FILE *out) } } fprintf(out, "\t"); + fprintf(out, "%d\t", self->parents_offset[j]); + for (k = self->parents_offset[j]; k < self->parents_offset[j + 1]; k++) { + fprintf(out, "%d", self->parents[k]); + if (k + 1 < self->parents_offset[j + 1]) { + fprintf(out, ","); + } + } + fprintf(out, "\t"); fprintf(out, "%d\t", self->metadata_offset[j]); for (k = self->metadata_offset[j]; k < self->metadata_offset[j + 1]; k++) { fprintf(out, "%c", self->metadata[k]); @@ -611,6 +712,8 @@ tsk_individual_table_get_row_unsafe( row->location_length = self->location_offset[index + 1] - self->location_offset[index]; row->location = self->location + self->location_offset[index]; + row->parents_length = self->parents_offset[index + 1] - self->parents_offset[index]; + row->parents = self->parents + self->parents_offset[index]; row->metadata_length = self->metadata_offset[index + 1] - self->metadata_offset[index]; row->metadata = self->metadata + self->metadata_offset[index]; @@ -656,7 +759,7 @@ tsk_individual_table_dump_text(const tsk_individual_table_t *self, FILE *out) if (err < 0) { goto out; } - err = fprintf(out, "id\tflags\tlocation\tmetadata\n"); + err = fprintf(out, "id\tflags\tlocation\tparents\tmetadata\n"); if (err < 0) { goto out; } @@ -678,6 +781,18 @@ tsk_individual_table_dump_text(const tsk_individual_table_t *self, FILE *out) } } } + for (k = self->parents_offset[j]; k < self->parents_offset[j + 1]; k++) { + err = fprintf(out, "%d", self->parents[k]); + if (err < 0) { + goto out; + } + if (k + 1 < self->parents_offset[j + 1]) { + err = fprintf(out, ","); + if (err < 0) { + goto out; + } + } + } err = fprintf( out, "\t%.*s\n", metadata_len, self->metadata + self->metadata_offset[j]); if (err < 0) { @@ -701,7 +816,14 @@ tsk_individual_table_equals(const tsk_individual_table_t *self, == 0 && memcmp( self->location, other->location, self->location_length * sizeof(double)) + == 0 + && memcmp(self->parents_offset, other->parents_offset, + (self->num_rows + 1) * sizeof(tsk_size_t)) + == 0 + && memcmp( + self->parents, other->parents, self->parents_length * sizeof(tsk_id_t)) == 0; + if (!(options & TSK_CMP_IGNORE_METADATA)) { ret = ret && self->metadata_length == other->metadata_length && self->metadata_schema_length == other->metadata_schema_length @@ -727,6 +849,10 @@ tsk_individual_table_dump(const tsk_individual_table_t *self, kastore_t *store) KAS_FLOAT64 }, { "individuals/location_offset", (void *) self->location_offset, self->num_rows + 1, KAS_UINT32 }, + { "individuals/parents", (void *) self->parents, self->parents_length, + KAS_INT32 }, + { "individuals/parents_offset", (void *) self->parents_offset, + self->num_rows + 1, KAS_UINT32 }, { "individuals/metadata", (void *) self->metadata, self->metadata_length, KAS_UINT8 }, { "individuals/metadata_offset", (void *) self->metadata_offset, @@ -744,10 +870,13 @@ tsk_individual_table_load(tsk_individual_table_t *self, kastore_t *store) tsk_flags_t *flags = NULL; double *location = NULL; tsk_size_t *location_offset = NULL; + tsk_id_t *parents = NULL; + tsk_size_t *parents_offset = NULL; char *metadata = NULL; tsk_size_t *metadata_offset = NULL; char *metadata_schema = NULL; - tsk_size_t num_rows, location_length, metadata_length, metadata_schema_length; + tsk_size_t num_rows, location_length, parents_length, metadata_length, + metadata_schema_length; read_table_col_t read_cols[] = { { "individuals/flags", (void **) &flags, &num_rows, 0, KAS_UINT32, 0 }, @@ -755,6 +884,9 @@ tsk_individual_table_load(tsk_individual_table_t *self, kastore_t *store) 0 }, { "individuals/location_offset", (void **) &location_offset, &num_rows, 1, KAS_UINT32, 0 }, + { "individuals/parents", (void **) &parents, &parents_length, 0, KAS_INT32, 0 }, + { "individuals/parents_offset", (void **) &parents_offset, &num_rows, 1, + KAS_UINT32, 0 }, { "individuals/metadata", (void **) &metadata, &metadata_length, 0, KAS_UINT8, 0 }, { "individuals/metadata_offset", (void **) &metadata_offset, &num_rows, 1, @@ -771,12 +903,16 @@ tsk_individual_table_load(tsk_individual_table_t *self, kastore_t *store) ret = TSK_ERR_BAD_OFFSET; goto out; } + if (parents_offset[num_rows] != parents_length) { + ret = TSK_ERR_BAD_OFFSET; + goto out; + } if (metadata_offset[num_rows] != metadata_length) { ret = TSK_ERR_BAD_OFFSET; goto out; } - ret = tsk_individual_table_set_columns( - self, num_rows, flags, location, location_offset, metadata, metadata_offset); + ret = tsk_individual_table_set_columns(self, num_rows, flags, location, + location_offset, parents, parents_offset, metadata, metadata_offset); if (ret != 0) { goto out; } @@ -7005,7 +7141,8 @@ simplifier_finalise_references(simplifier_t *self) individual_id_map[j] = TSK_NULL; if (keep) { ret = tsk_individual_table_add_row(&self->tables->individuals, ind.flags, - ind.location, ind.location_length, ind.metadata, ind.metadata_length); + ind.location, ind.location_length, ind.parents, ind.parents_length, + ind.metadata, ind.metadata_length); if (ret < 0) { goto out; } @@ -9112,7 +9249,8 @@ tsk_table_collection_add_and_remap_node(tsk_table_collection_t *self, goto out; } ret = tsk_individual_table_add_row(&self->individuals, ind.flags, - ind.location, ind.location_length, ind.metadata, ind.metadata_length); + ind.location, ind.location_length, ind.parents, ind.parents_length, + ind.metadata, ind.metadata_length); if (ret < 0) { goto out; } diff --git a/c/tskit/tables.h b/c/tskit/tables.h index e31d5e45a1..fc2c456724 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -95,6 +95,10 @@ typedef struct { const double *location; /** @brief Number of spatial dimensions. */ tsk_size_t location_length; + /** @brief IDs of the parents. The number of parents given by ``parents_length``*/ + tsk_id_t *parents; + /** @brief Number of parents. */ + tsk_size_t parents_length; /** @brief Metadata. */ const char *metadata; /** @brief Size of the metadata in bytes. */ @@ -297,6 +301,10 @@ typedef struct { tsk_size_t location_length; tsk_size_t max_location_length; tsk_size_t max_location_length_increment; + /** @brief The total length of the parent column. */ + tsk_size_t parents_length; + tsk_size_t max_parents_length; + tsk_size_t max_parents_length_increment; /** @brief The total length of the metadata column. */ tsk_size_t metadata_length; tsk_size_t max_metadata_length; @@ -308,6 +316,10 @@ typedef struct { double *location; /** @brief The location_offset column. */ tsk_size_t *location_offset; + /** @brief The parents column. */ + tsk_id_t *parents; + /** @brief The parents_offset column. */ + tsk_size_t *parents_offset; /** @brief The metadata column. */ char *metadata; /** @brief The metadata_offset column. */ @@ -770,11 +782,10 @@ int tsk_individual_table_free(tsk_individual_table_t *self); @brief Adds a row to this individual table. @rst -Add a new individual with the specified ``flags``, ``location`` and ``metadata`` -to the table. Copies of the ``location`` and ``metadata`` parameters are taken -immediately. -See the :ref:`table definition ` for details -of the columns in this table. +Add a new individual with the specified ``flags``, ``location``, ``parents`` and +``metadata`` to the table. Copies of the ``location``, ``parents`` and ``metadata`` +parameters are taken immediately. See the :ref:`table definition +` for details of the columns in this table. @endrst @param self A pointer to a tsk_individual_table_t object. @@ -784,6 +795,11 @@ of the columns in this table. @param location_length The number of dimensions in the locations position. Note this the number of elements in the corresponding double array not the number of bytes. +@param parents A pointer to a ``tsk_id`` array representing the parents + of the new individual. Can be ``NULL`` if ``parents_length`` is 0. +@param parents_length The number of parents. + Note this the number of elements in the corresponding ``tsk_id`` array + not the number of bytes. @param metadata The metadata to be associated with the new individual. This is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0. @param metadata_length The size of the metadata array in bytes. @@ -791,8 +807,8 @@ of the columns in this table. or a negative value on failure. */ tsk_id_t tsk_individual_table_add_row(tsk_individual_table_t *self, tsk_flags_t flags, - const double *location, tsk_size_t location_length, const char *metadata, - tsk_size_t metadata_length); + const double *location, tsk_size_t location_length, const tsk_id_t *parents, + tsk_size_t parents_length, const char *metadata, tsk_size_t metadata_length); /** @brief Clears this table, setting the number of rows to zero. @@ -915,12 +931,14 @@ void tsk_individual_table_print_state(const tsk_individual_table_t *self, FILE * /* Undocumented methods */ int tsk_individual_table_set_columns(tsk_individual_table_t *self, tsk_size_t num_rows, - const tsk_flags_t *flags, const double *location, const tsk_size_t *location_length, - const char *metadata, const tsk_size_t *metadata_length); + const tsk_flags_t *flags, const double *location, const tsk_size_t *location_offset, + const tsk_id_t *parents, const tsk_size_t *parents_offset, const char *metadata, + const tsk_size_t *metadata_offset); int tsk_individual_table_append_columns(tsk_individual_table_t *self, tsk_size_t num_rows, const tsk_flags_t *flags, const double *location, - const tsk_size_t *location_length, const char *metadata, - const tsk_size_t *metadata_length); + const tsk_size_t *location_offset, const tsk_id_t *parents, + const tsk_size_t *parents_offset, const char *metadata, + const tsk_size_t *metadata_offset); int tsk_individual_table_dump_text(const tsk_individual_table_t *self, FILE *out); int tsk_individual_table_set_max_rows_increment( tsk_individual_table_t *self, tsk_size_t max_rows_increment); @@ -928,6 +946,8 @@ int tsk_individual_table_set_max_metadata_length_increment( tsk_individual_table_t *self, tsk_size_t max_metadata_length_increment); int tsk_individual_table_set_max_location_length_increment( tsk_individual_table_t *self, tsk_size_t max_location_length_increment); +int tsk_individual_table_set_max_parents_length_increment( + tsk_individual_table_t *self, tsk_size_t max_parents_length_increment); /** @defgroup NODE_TABLE_API_GROUP Node table API. diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c index cad8c72245..4b7339300d 100644 --- a/python/_tskitmodule.c +++ b/python/_tskitmodule.c @@ -340,6 +340,7 @@ make_individual_row(const tsk_individual_t *r) PyObject *ret = NULL; PyObject *metadata = make_metadata(r->metadata, (Py_ssize_t) r->metadata_length); PyArrayObject *location = NULL; + PyArrayObject *parents = NULL; npy_intp dims; dims = (npy_intp) r->location_length; @@ -348,9 +349,16 @@ make_individual_row(const tsk_individual_t *r) goto out; } memcpy(PyArray_DATA(location), r->location, r->location_length * sizeof(double)); - ret = Py_BuildValue("IOO", (unsigned int) r->flags, location, metadata); + dims = (npy_intp) r->parents_length; + parents = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_INT32); + if (metadata == NULL || parents == NULL) { + goto out; + } + memcpy(PyArray_DATA(parents), r->parents, r->parents_length * sizeof(tsk_id_t)); + ret = Py_BuildValue("IOOO", (unsigned int) r->flags, location, parents, metadata); out: Py_XDECREF(location); + Py_XDECREF(parents); Py_XDECREF(metadata); return ret; } @@ -361,21 +369,27 @@ make_individual_object(const tsk_individual_t *r) PyObject *ret = NULL; PyObject *metadata = make_metadata(r->metadata, (Py_ssize_t) r->metadata_length); PyArrayObject *location = NULL; + PyArrayObject *parents = NULL; PyArrayObject *nodes = NULL; npy_intp dims; dims = (npy_intp) r->location_length; location = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_FLOAT64); + dims = (npy_intp) r->parents_length; + parents = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_INT32); dims = (npy_intp) r->nodes_length; nodes = (PyArrayObject *) PyArray_SimpleNew(1, &dims, NPY_INT32); - if (metadata == NULL || location == NULL || nodes == NULL) { + if (metadata == NULL || location == NULL || parents == NULL || nodes == NULL) { goto out; } memcpy(PyArray_DATA(location), r->location, r->location_length * sizeof(double)); + memcpy(PyArray_DATA(parents), r->parents, r->parents_length * sizeof(tsk_id_t)); memcpy(PyArray_DATA(nodes), r->nodes, r->nodes_length * sizeof(tsk_id_t)); - ret = Py_BuildValue("IOOO", (unsigned int) r->flags, location, metadata, nodes); + ret = Py_BuildValue( + "IOOOO", (unsigned int) r->flags, location, parents, metadata, nodes); out: Py_XDECREF(location); + Py_XDECREF(parents); Py_XDECREF(metadata); Py_XDECREF(nodes); return ret; @@ -837,19 +851,23 @@ IndividualTable_add_row(IndividualTable *self, PyObject *args, PyObject *kwds) unsigned int flags = 0; PyObject *py_metadata = Py_None; PyObject *py_location = Py_None; + PyObject *py_parents = Py_None; PyArrayObject *location_array = NULL; double *location_data = NULL; tsk_size_t location_length = 0; + PyArrayObject *parents_array = NULL; + tsk_id_t *parents_data = NULL; + tsk_size_t parents_length = 0; char *metadata = ""; Py_ssize_t metadata_length = 0; npy_intp *shape; - static char *kwlist[] = { "flags", "location", "metadata", NULL }; + static char *kwlist[] = { "flags", "location", "parents", "metadata", NULL }; if (IndividualTable_check_state(self) != 0) { goto out; } - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&OO", kwlist, &uint32_converter, - &flags, &py_location, &py_metadata)) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&OOO", kwlist, &uint32_converter, + &flags, &py_location, &py_parents, &py_metadata)) { goto out; } if (py_metadata != Py_None) { @@ -868,8 +886,19 @@ IndividualTable_add_row(IndividualTable *self, PyObject *args, PyObject *kwds) location_length = (tsk_size_t) shape[0]; location_data = PyArray_DATA(location_array); } + if (py_parents != Py_None) { + /* This ensures that only 1D arrays are accepted. */ + parents_array = (PyArrayObject *) PyArray_FromAny(py_parents, + PyArray_DescrFromType(NPY_INT32), 1, 1, NPY_ARRAY_IN_ARRAY, NULL); + if (parents_array == NULL) { + goto out; + } + shape = PyArray_DIMS(parents_array); + parents_length = (tsk_size_t) shape[0]; + parents_data = PyArray_DATA(parents_array); + } err = tsk_individual_table_add_row(self->table, (tsk_flags_t) flags, location_data, - location_length, metadata, metadata_length); + location_length, parents_data, parents_length, metadata, metadata_length); if (err < 0) { handle_library_error(err); goto out; @@ -877,6 +906,7 @@ IndividualTable_add_row(IndividualTable *self, PyObject *args, PyObject *kwds) ret = Py_BuildValue("i", err); out: Py_XDECREF(location_array); + Py_XDECREF(parents_array); return ret; } @@ -1093,6 +1123,34 @@ IndividualTable_get_location_offset(IndividualTable *self, void *closure) return ret; } +static PyObject * +IndividualTable_get_parents(IndividualTable *self, void *closure) +{ + PyObject *ret = NULL; + + if (IndividualTable_check_state(self) != 0) { + goto out; + } + ret = table_get_column_array( + self->table->parents_length, self->table->parents, NPY_INT32, sizeof(tsk_id_t)); +out: + return ret; +} + +static PyObject * +IndividualTable_get_parents_offset(IndividualTable *self, void *closure) +{ + PyObject *ret = NULL; + + if (IndividualTable_check_state(self) != 0) { + goto out; + } + ret = table_get_column_array(self->table->num_rows + 1, self->table->parents_offset, + NPY_UINT32, sizeof(uint32_t)); +out: + return ret; +} + static PyObject * IndividualTable_get_metadata(IndividualTable *self, void *closure) { @@ -1180,6 +1238,12 @@ static PyGetSetDef IndividualTable_getsetters[] = { { .name = "location_offset", .get = (getter) IndividualTable_get_location_offset, .doc = "The location offset array" }, + { .name = "parents", + .get = (getter) IndividualTable_get_parents, + .doc = "The parents array" }, + { .name = "parents_offset", + .get = (getter) IndividualTable_get_parents_offset, + .doc = "The parents offset array" }, { .name = "metadata", .get = (getter) IndividualTable_get_metadata, .doc = "The metadata array" }, diff --git a/python/lwt_interface/dict_encoding_testlib.py b/python/lwt_interface/dict_encoding_testlib.py index 87326afe5a..44d92efdbc 100644 --- a/python/lwt_interface/dict_encoding_testlib.py +++ b/python/lwt_interface/dict_encoding_testlib.py @@ -70,7 +70,7 @@ def full_ts(): # TODO replace this with properly linked up individuals using sim_ancestry # once 1.0 is released. for j in range(n): - tables.individuals.add_row(flags=j, location=(j, j)) + tables.individuals.add_row(flags=j, location=(j, j), parents=(j, j)) for name, table in tables.name_map.items(): if name != "provenances": diff --git a/python/lwt_interface/tskit_lwt_interface.h b/python/lwt_interface/tskit_lwt_interface.h index c278674e3a..a40b0675bf 100644 --- a/python/lwt_interface/tskit_lwt_interface.h +++ b/python/lwt_interface/tskit_lwt_interface.h @@ -173,17 +173,23 @@ parse_individual_table_dict( { int err; int ret = -1; - size_t num_rows, metadata_length, location_length; + size_t num_rows, metadata_length, location_length, parents_length; char *metadata_data = NULL; double *location_data = NULL; + tsk_id_t *parents_data = NULL; uint32_t *metadata_offset_data = NULL; uint32_t *location_offset_data = NULL; + uint32_t *parents_offset_data = NULL; PyObject *flags_input = NULL; PyArrayObject *flags_array = NULL; PyObject *location_input = NULL; PyArrayObject *location_array = NULL; PyObject *location_offset_input = NULL; PyArrayObject *location_offset_array = NULL; + PyObject *parents_input = NULL; + PyArrayObject *parents_array = NULL; + PyObject *parents_offset_input = NULL; + PyArrayObject *parents_offset_array = NULL; PyObject *metadata_input = NULL; PyArrayObject *metadata_array = NULL; PyObject *metadata_offset_input = NULL; @@ -205,6 +211,14 @@ parse_individual_table_dict( if (location_offset_input == NULL) { goto out; } + parents_input = get_table_dict_value(dict, "parents", false); + if (parents_input == NULL) { + goto out; + } + parents_offset_input = get_table_dict_value(dict, "parents_offset", false); + if (parents_offset_input == NULL) { + goto out; + } metadata_input = get_table_dict_value(dict, "metadata", false); if (metadata_input == NULL) { goto out; @@ -242,6 +256,25 @@ parse_individual_table_dict( } location_offset_data = PyArray_DATA(location_offset_array); } + if ((parents_input == Py_None) != (parents_offset_input == Py_None)) { + PyErr_SetString( + PyExc_TypeError, "parents and parents_offset must be specified together"); + goto out; + } + if (parents_input != Py_None) { + parents_array + = table_read_column_array(parents_input, NPY_INT32, &parents_length, false); + if (parents_array == NULL) { + goto out; + } + parents_data = PyArray_DATA(parents_array); + parents_offset_array = table_read_offset_array( + parents_offset_input, &num_rows, parents_length, true); + if (parents_offset_array == NULL) { + goto out; + } + parents_offset_data = PyArray_DATA(parents_offset_array); + } if ((metadata_input == Py_None) != (metadata_offset_input == Py_None)) { PyErr_SetString( PyExc_TypeError, "metadata and metadata_offset must be specified together"); @@ -284,7 +317,8 @@ parse_individual_table_dict( } } err = tsk_individual_table_append_columns(table, num_rows, PyArray_DATA(flags_array), - location_data, location_offset_data, metadata_data, metadata_offset_data); + location_data, location_offset_data, parents_data, parents_offset_data, + metadata_data, metadata_offset_data); if (err != 0) { handle_tskit_error(err); goto out; @@ -294,6 +328,8 @@ parse_individual_table_dict( Py_XDECREF(flags_array); Py_XDECREF(location_array); Py_XDECREF(location_offset_array); + Py_XDECREF(parents_array); + Py_XDECREF(parents_offset_array); Py_XDECREF(metadata_array); Py_XDECREF(metadata_offset_array); return ret; @@ -1450,6 +1486,10 @@ write_table_arrays(tsk_table_collection_t *tables, PyObject *dict) tables->individuals.location_length, NPY_FLOAT64 }, { "location_offset", (void *) tables->individuals.location_offset, tables->individuals.num_rows + 1, NPY_UINT32 }, + { "parents", (void *) tables->individuals.parents, + tables->individuals.parents_length, NPY_INT32 }, + { "parents_offset", (void *) tables->individuals.parents_offset, + tables->individuals.num_rows + 1, NPY_UINT32 }, { "metadata", (void *) tables->individuals.metadata, tables->individuals.metadata_length, NPY_INT8 }, { "metadata_offset", (void *) tables->individuals.metadata_offset, diff --git a/python/tests/conftest.py b/python/tests/conftest.py index d5d12f2aad..1c04d75c38 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -101,7 +101,7 @@ def ts_fixture(): # TODO replace this with properly linked up individuals using sim_ancestry # once 1.0 is released. for j in range(n): - tables.individuals.add_row(flags=j, location=(j, j)) + tables.individuals.add_row(flags=j, location=(j, j), parents=(j, j)) for name, table in tables.name_map.items(): if name != "provenances": diff --git a/python/tests/test_file_format.py b/python/tests/test_file_format.py index 92204c05ae..2a213162ad 100644 --- a/python/tests/test_file_format.py +++ b/python/tests/test_file_format.py @@ -501,6 +501,8 @@ def verify_keys(self, ts): "individuals/metadata", "individuals/metadata_offset", "individuals/metadata_schema", + "individuals/parents", + "individuals/parents_offset", "metadata", "metadata_schema", "migrations/dest", diff --git a/python/tests/test_tables.py b/python/tests/test_tables.py index 2543b17708..a7995fdeb7 100644 --- a/python/tests/test_tables.py +++ b/python/tests/test_tables.py @@ -855,6 +855,7 @@ class TestIndividualTable(CommonTestsMixin, MetadataTestsMixin): columns = [UInt32Column("flags")] ragged_list_columns = [ (DoubleColumn("location"), UInt32Column("location_offset")), + (Int32Column("parents"), UInt32Column("parents_offset")), (CharColumn("metadata"), UInt32Column("metadata_offset")), ] string_colnames = [] @@ -867,17 +868,19 @@ def test_simple_example(self): t = tskit.IndividualTable() t.add_row(flags=0, location=[], metadata=b"123") t.add_row(flags=1, location=(0, 1, 2, 3), metadata=b"\xf0") + t.add_row(flags=2, parents=[0, 1]) s = str(t) assert len(s) > 0 - assert len(t) == 2 + assert len(t) == 3 assert t[0].flags == 0 assert list(t[0].location) == [] assert t[0].metadata == b"123" assert t[1].flags == 1 assert list(t[1].location) == [0, 1, 2, 3] assert t[1].metadata == b"\xf0" + assert list(t[2].parents) == [0, 1] with pytest.raises(IndexError): - t.__getitem__(-3) + t.__getitem__(-4) def test_add_row_defaults(self): t = tskit.IndividualTable() @@ -885,6 +888,8 @@ def test_add_row_defaults(self): assert t.flags[0] == 0 assert len(t.location) == 0 assert t.location_offset[0] == 0 + assert len(t.parents) == 0 + assert t.parents_offset[0] == 0 assert len(t.metadata) == 0 assert t.metadata_offset[0] == 0 @@ -896,6 +901,8 @@ def test_add_row_bad_data(self): t.add_row(metadata=123) with pytest.raises(ValueError): t.add_row(location="1234") + with pytest.raises(ValueError): + t.add_row(parents="forty-two") def test_packset_location(self): t = tskit.IndividualTable() diff --git a/python/tests/tsutil.py b/python/tests/tsutil.py index fabebdd499..1554b2bc23 100644 --- a/python/tests/tsutil.py +++ b/python/tests/tsutil.py @@ -591,7 +591,7 @@ def py_subset(tables, nodes, record_provenance=True): if node.individual not in ind_map and node.individual != tskit.NULL: ind = full.individuals[node.individual] new_ind_id = tables.individuals.add_row( - ind.flags, ind.location, ind.metadata + ind.flags, ind.location, ind.parents, ind.metadata ) ind_map[node.individual] = new_ind_id if node.population not in pop_map and node.population != tskit.NULL: diff --git a/python/tskit/tables.py b/python/tskit/tables.py index 4df965c188..5e2c53b12f 100644 --- a/python/tskit/tables.py +++ b/python/tskit/tables.py @@ -49,6 +49,7 @@ class IndividualTableRow: flags: int location: np.ndarray + parents: np.ndarray metadata: bytes def __eq__(self, other): @@ -59,6 +60,7 @@ def __eq__(self, other): ( self.flags == other.flags, np.array_equal(self.location, other.location), + np.array_equal(self.parents, other.parents), self.metadata == other.metadata, ) ) @@ -469,6 +471,8 @@ class IndividualTable(BaseTable, MetadataMixin): "flags", "location", "location_offset", + "parents", + "parents_offset", "metadata", "metadata_offset", ] @@ -481,8 +485,9 @@ def __init__(self, max_rows_increment=0, ll_table=None): def _text_header_and_rows(self, limit=None): flags = self.flags location = util.unpack_arrays(self.location, self.location_offset) + parents = util.unpack_arrays(self.parents, self.parents_offset) metadata = util.unpack_bytes(self.metadata, self.metadata_offset) - headers = ("id", "flags", "location", "metadata") + headers = ("id", "flags", "location", "parents", "metadata") rows = [] if limit is None or self.num_rows <= limit: indexes = range(self.num_rows) @@ -498,12 +503,15 @@ def _text_header_and_rows(self, limit=None): else: md = base64.b64encode(metadata[j]).decode("utf8") location_str = ",".join(map(str, location[j])) + parents_str = ",".join(map(str, parents[j])) rows.append( - "{}\t{}\t{}\t{}".format(j, flags[j], location_str, md).split("\t") + "{}\t{}\t{}\t{}\t{}".format( + j, flags[j], location_str, parents_str, md + ).split("\t") ) return headers, rows - def add_row(self, flags=0, location=None, metadata=None): + def add_row(self, flags=0, location=None, parents=None, metadata=None): """ Adds a new row to this :class:`IndividualTable` and returns the ID of the corresponding individual. Metadata, if specified, will be validated and encoded @@ -523,13 +531,17 @@ def add_row(self, flags=0, location=None, metadata=None): if metadata is None: metadata = self.metadata_schema.empty_value metadata = self.metadata_schema.validate_and_encode_row(metadata) - return self.ll_table.add_row(flags=flags, location=location, metadata=metadata) + return self.ll_table.add_row( + flags=flags, location=location, parents=parents, metadata=metadata + ) def set_columns( self, flags=None, location=None, location_offset=None, + parents=None, + parents_offset=None, metadata=None, metadata_offset=None, metadata_schema=None, @@ -570,6 +582,8 @@ def set_columns( flags=flags, location=location, location_offset=location_offset, + parents=parents, + parents_offset=parents_offset, metadata=metadata, metadata_offset=metadata_offset, metadata_schema=metadata_schema, @@ -581,6 +595,8 @@ def append_columns( flags=None, location=None, location_offset=None, + parents=None, + parents_offset=None, metadata=None, metadata_offset=None, ): @@ -618,6 +634,8 @@ def append_columns( flags=flags, location=location, location_offset=location_offset, + parents=parents, + parents_offset=parents_offset, metadata=metadata, metadata_offset=metadata_offset, ) diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 92e93b79f0..320386fefe 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -161,6 +161,7 @@ def __init__( id_=None, flags=0, location=None, + parents=None, nodes=None, encoded_metadata=b"", metadata_decoder=lambda metadata: metadata, @@ -168,6 +169,7 @@ def __init__( self.id = id_ self.flags = flags self.location = location + self.parents = parents self._encoded_metadata = encoded_metadata self._metadata_decoder = metadata_decoder self.nodes = nodes @@ -179,6 +181,7 @@ def __eq__(self, other): and self._encoded_metadata == other._encoded_metadata and np.array_equal(self.nodes, other.nodes) and np.array_equal(self.location, other.location) + and np.array_equal(self.parents, other.parents) ) @@ -4423,11 +4426,18 @@ def individual(self, id_): :rtype: :class:`Individual` """ - flags, location, metadata, nodes = self._ll_tree_sequence.get_individual(id_) + ( + flags, + location, + parents, + metadata, + nodes, + ) = self._ll_tree_sequence.get_individual(id_) return Individual( id_=id_, flags=flags, location=location, + parents=parents, encoded_metadata=metadata, metadata_decoder=self.table_metadata_schemas.individual.decode_row, nodes=nodes,