diff --git a/c/tests/test_file_format.c b/c/tests/test_file_format.c index 255b7a3b5f..24a4ab536c 100644 --- a/c/tests/test_file_format.c +++ b/c/tests/test_file_format.c @@ -319,6 +319,7 @@ test_missing_required_column_pairs(void) tsk_table_collection_t t; const char *required_cols[][2] = { { "individuals/location", "individuals/location_offset" }, + { "individuals/parents", "individuals/parents_offset" }, { "individuals/metadata", "individuals/metadata_offset" }, { "mutations/derived_state", "mutations/derived_state_offset" }, { "mutations/metadata", "mutations/metadata_offset" }, @@ -438,6 +439,7 @@ test_bad_offset_columns(void) "edges/metadata_offset", "migrations/metadata_offset", "individuals/location_offset", + "individuals/parents_offset", "individuals/metadata_offset", "mutations/derived_state_offset", "mutations/metadata_offset", diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index c0e3a157a5..6a75a92818 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -2069,14 +2069,17 @@ test_individual_table(void) tsk_size_t k; uint32_t *flags; double *location; + tsk_id_t *parents; char *metadata; tsk_size_t *metadata_offset; + tsk_size_t *parents_offset; tsk_size_t *location_offset; tsk_individual_t individual; const char *test_metadata = "test"; tsk_size_t test_metadata_length = 4; char metadata_copy[test_metadata_length + 1]; tsk_size_t spatial_dimension = 2; + tsk_size_t num_parents = 2; double test_location[spatial_dimension]; for (k = 0; k < spatial_dimension; k++) { @@ -2165,6 +2168,14 @@ test_individual_table(void) for (j = 0; j < (tsk_id_t) num_rows + 1; j++) { location_offset[j] = (tsk_size_t) j * spatial_dimension; } + parents = malloc(num_parents * num_rows * sizeof(tsk_id_t)); + CU_ASSERT_FATAL(parents != NULL); + memset(parents, 0, num_parents * num_rows * sizeof(tsk_id_t)); + parents_offset = malloc((num_rows + 1) * sizeof(tsk_size_t)); + CU_ASSERT_FATAL(parents_offset != NULL); + for (j = 0; j < (tsk_id_t) num_rows + 1; j++) { + parents_offset[j] = (tsk_size_t) j * num_parents; + } metadata = malloc(num_rows * sizeof(char)); memset(metadata, 'a', num_rows * sizeof(char)); CU_ASSERT_FATAL(metadata != NULL); @@ -2174,7 +2185,7 @@ test_individual_table(void) metadata_offset[j] = (tsk_size_t) j; } ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, - location_offset, NULL, NULL, metadata, metadata_offset); + location_offset, parents, parents_offset, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL(memcmp(table.flags, flags, num_rows * sizeof(uint32_t)), 0); CU_ASSERT_EQUAL( @@ -2183,18 +2194,24 @@ test_individual_table(void) CU_ASSERT_EQUAL(memcmp(table.location_offset, location_offset, (num_rows + 1) * sizeof(tsk_size_t)), 0); + CU_ASSERT_EQUAL( + memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)), 0); + CU_ASSERT_EQUAL(memcmp(table.parents_offset, parents_offset, + (num_rows + 1) * sizeof(tsk_size_t)), + 0); CU_ASSERT_EQUAL(memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0); CU_ASSERT_EQUAL(memcmp(table.metadata_offset, metadata_offset, (num_rows + 1) * sizeof(tsk_size_t)), 0); CU_ASSERT_EQUAL(table.num_rows, num_rows); CU_ASSERT_EQUAL(table.location_length, spatial_dimension * num_rows); + CU_ASSERT_EQUAL(table.parents_length, num_parents * num_rows); CU_ASSERT_EQUAL(table.metadata_length, num_rows); tsk_individual_table_print_state(&table, _devnull); /* Append another num_rows onto the end */ ret = tsk_individual_table_append_columns(&table, num_rows, flags, location, - location_offset, NULL, NULL, metadata, metadata_offset); + location_offset, parents, parents_offset, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL(memcmp(table.flags, flags, num_rows * sizeof(uint32_t)), 0); CU_ASSERT_EQUAL( @@ -2208,8 +2225,15 @@ test_individual_table(void) CU_ASSERT_EQUAL(memcmp(table.location + spatial_dimension * num_rows, location, spatial_dimension * num_rows * sizeof(double)), 0); + CU_ASSERT_EQUAL( + memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)), 0); + CU_ASSERT_EQUAL(memcmp(table.parents + num_parents * num_rows, parents, + num_parents * num_rows * sizeof(tsk_id_t)), + 0); CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows); CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows); + CU_ASSERT_EQUAL(table.parents_length, 2 * num_parents * num_rows); + CU_ASSERT_EQUAL(table.location_length, 2 * spatial_dimension * num_rows); tsk_individual_table_print_state(&table, _devnull); ret = tsk_individual_table_dump_text(&table, _devnull); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -2224,36 +2248,48 @@ test_individual_table(void) CU_ASSERT_EQUAL(memcmp(table.location_offset, location_offset, (num_rows + 1) * sizeof(tsk_size_t)), 0); + CU_ASSERT_EQUAL( + memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)), 0); + CU_ASSERT_EQUAL(memcmp(table.parents_offset, parents_offset, + (num_rows + 1) * sizeof(tsk_size_t)), + 0); CU_ASSERT_EQUAL(memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0); CU_ASSERT_EQUAL(memcmp(table.metadata_offset, metadata_offset, (num_rows + 1) * sizeof(tsk_size_t)), 0); CU_ASSERT_EQUAL(table.num_rows, num_rows); CU_ASSERT_EQUAL(table.location_length, spatial_dimension * num_rows); + CU_ASSERT_EQUAL(table.parents_length, num_parents * num_rows); CU_ASSERT_EQUAL(table.metadata_length, num_rows); tsk_individual_table_print_state(&table, _devnull); ret = tsk_individual_table_truncate(&table, num_rows + 1); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION); - // TODO: add tests for parent spec /* flags can't be NULL */ ret = tsk_individual_table_set_columns(&table, num_rows, NULL, location, - location_offset, NULL, NULL, metadata, metadata_offset); + location_offset, parents, parents_offset, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); /* location and location offset must be simultaneously NULL or not */ - ret = tsk_individual_table_set_columns( - &table, num_rows, flags, location, NULL, NULL, NULL, metadata, metadata_offset); + ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, NULL, + parents, parents_offset, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); ret = tsk_individual_table_set_columns(&table, num_rows, flags, NULL, location_offset, NULL, NULL, metadata, metadata_offset); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); + /* parents and parents offset must be simultaneously NULL or not */ + ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, + location_offset, parents, NULL, metadata, metadata_offset); + CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); + ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, + location_offset, NULL, parents_offset, metadata, metadata_offset); + CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); /* metadata and metadata offset must be simultaneously NULL or not */ ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, - location_offset, NULL, NULL, NULL, metadata_offset); + location_offset, parents, parents_offset, NULL, metadata_offset); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); - ret = tsk_individual_table_set_columns( - &table, num_rows, flags, location, location_offset, NULL, NULL, metadata, NULL); + ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, + location_offset, parents, parents_offset, metadata, NULL); CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); /* if location and location_offset are both null, all locations are zero length */ @@ -2282,6 +2318,32 @@ test_individual_table(void) ret = tsk_individual_table_dump_text(&table, _devnull); CU_ASSERT_EQUAL_FATAL(ret, 0); + /* if parents and parents_offset are both null, all parents are zero length */ + num_rows = 10; + memset(parents_offset, 0, (num_rows + 1) * sizeof(tsk_size_t)); + ret = tsk_individual_table_set_columns( + &table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL); + CU_ASSERT_EQUAL(ret, 0); + CU_ASSERT_EQUAL(memcmp(table.parents_offset, parents_offset, + (num_rows + 1) * sizeof(tsk_size_t)), + 0); + CU_ASSERT_EQUAL(table.num_rows, num_rows); + CU_ASSERT_EQUAL(table.parents_length, 0); + ret = tsk_individual_table_append_columns( + &table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL); + CU_ASSERT_EQUAL(ret, 0); + CU_ASSERT_EQUAL(memcmp(table.parents_offset, parents_offset, + (num_rows + 1) * sizeof(tsk_size_t)), + 0); + CU_ASSERT_EQUAL(memcmp(table.parents_offset + num_rows, parents_offset, + num_rows * sizeof(uint32_t)), + 0); + CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows); + CU_ASSERT_EQUAL(table.parents_length, 0); + tsk_individual_table_print_state(&table, _devnull); + ret = tsk_individual_table_dump_text(&table, _devnull); + CU_ASSERT_EQUAL_FATAL(ret, 0); + /* if metadata and metadata_offset are both null, all metadatas are zero length */ num_rows = 10; memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t)); @@ -2297,8 +2359,8 @@ test_individual_table(void) 0); CU_ASSERT_EQUAL(table.num_rows, num_rows); CU_ASSERT_EQUAL(table.metadata_length, 0); - ret = tsk_individual_table_append_columns( - &table, num_rows, flags, location, location_offset, NULL, NULL, NULL, NULL); + ret = tsk_individual_table_append_columns(&table, num_rows, flags, location, + location_offset, parents, parents_offset, NULL, NULL); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL( memcmp(table.location, location, spatial_dimension * num_rows * sizeof(double)), @@ -2306,6 +2368,11 @@ test_individual_table(void) CU_ASSERT_EQUAL(memcmp(table.location + spatial_dimension * num_rows, location, spatial_dimension * num_rows * sizeof(double)), 0); + CU_ASSERT_EQUAL( + memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)), 0); + CU_ASSERT_EQUAL(memcmp(table.parents + num_parents * num_rows, parents, + num_parents * num_rows * sizeof(tsk_id_t)), + 0); CU_ASSERT_EQUAL(memcmp(table.metadata_offset, metadata_offset, (num_rows + 1) * sizeof(tsk_size_t)), 0); @@ -2352,6 +2419,8 @@ test_individual_table(void) free(flags); free(location); free(location_offset); + free(parents); + free(parents_offset); free(metadata); free(metadata_offset); } @@ -2766,6 +2835,10 @@ test_table_size_increments(void) CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_EQUAL_FATAL( tables.individuals.max_location_length_increment, default_size); + ret = tsk_individual_table_set_max_parents_length_increment( + &tables.individuals, new_size); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(tables.individuals.max_parents_length_increment, default_size); ret = tsk_node_table_set_max_rows_increment(&tables.nodes, new_size); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4362,6 +4435,7 @@ test_column_overflow(void) tsk_size_t too_big = ((tsk_size_t) UINT32_MAX); double zero = 0; char zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + tsk_id_t id_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; ret = tsk_table_collection_init(&tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4380,6 +4454,12 @@ test_column_overflow(void) ret = tsk_individual_table_add_row( &tables.individuals, 0, NULL, 0, NULL, 0, NULL, too_big); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, id_zeros, 1, NULL, 0); + CU_ASSERT_EQUAL_FATAL(ret, 2); + ret = tsk_individual_table_add_row( + &tables.individuals, 0, NULL, 0, NULL, too_big, NULL, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); ret = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, zeros, 1); CU_ASSERT_EQUAL_FATAL(ret, 0); diff --git a/c/tests/testlib.c b/c/tests/testlib.c index fccdc23536..199c97ce26 100644 --- a/c/tests/testlib.c +++ b/c/tests/testlib.c @@ -730,6 +730,7 @@ caterpillar_tree(tsk_size_t n, tsk_size_t num_sites, tsk_size_t num_mutations) tsk_id_t j, k, last_node, u; int state, m; double position[2]; + tsk_id_t parents[2] = { -1, -1 }; const char *states[] = { "0", "1" }; const char *metadata[] = { "This", "is", "some", "metadata" }; const int num_metadatas = sizeof(metadata) / sizeof(*metadata); @@ -772,8 +773,8 @@ caterpillar_tree(tsk_size_t n, tsk_size_t num_sites, tsk_size_t num_mutations) ret = tsk_population_table_add_row( &tables.populations, metadata[m], strlen(metadata[m])); CU_ASSERT_EQUAL_FATAL(ret, j); - ret = tsk_individual_table_add_row(&tables.individuals, 0, position, 2, NULL, 0, - metadata[m], strlen(metadata[m])); + ret = tsk_individual_table_add_row(&tables.individuals, 0, position, 2, parents, + 2, metadata[m], strlen(metadata[m])); CU_ASSERT_EQUAL_FATAL(ret, j); ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0, j, j, metadata[m], strlen(metadata[m])); diff --git a/python/tests/test_file_format.py b/python/tests/test_file_format.py index 2a213162ad..454f1896de 100644 --- a/python/tests/test_file_format.py +++ b/python/tests/test_file_format.py @@ -589,6 +589,10 @@ def verify_dump_format(self, ts): assert np.array_equal( tables.individuals.location_offset, store["individuals/location_offset"] ) + assert np.array_equal(tables.individuals.parents, store["individuals/parents"]) + assert np.array_equal( + tables.individuals.parents_offset, store["individuals/parents_offset"] + ) assert np.array_equal( tables.individuals.metadata, store["individuals/metadata"] ) diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index b27c590c57..aeec61d6a2 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -948,6 +948,7 @@ def test_individuals(self): assert inds[j] == ts.individual(j) assert inds[j].id == j assert isinstance(inds[j].metadata, bytes) + assert isinstance(inds[j].parents, np.ndarray) assert isinstance(inds[j].location, np.ndarray) assert isinstance(inds[j].nodes, np.ndarray) assert ind_node_map[j] == list(inds[j].nodes) @@ -2969,6 +2970,7 @@ def get_instances(self, n): id_=j, flags=j, location=[j], + parents=[j], nodes=[j], encoded_metadata=b"x" * j, metadata_decoder=lambda m: m.decode() + "decoded", diff --git a/python/tests/test_metadata.py b/python/tests/test_metadata.py index b2b95ef369..314a4d9214 100644 --- a/python/tests/test_metadata.py +++ b/python/tests/test_metadata.py @@ -209,26 +209,29 @@ class TestLoadTextMetadata: def test_individuals(self): individuals = io.StringIO( """\ - id flags location metadata - 0 1 0.0,1.0,0.0 abc - 1 1 1.0,2.0 XYZ+ - 2 0 2.0,3.0,0.0 !@#$%^&*() + id flags location parents metadata + 0 1 0.0,1.0,0.0 -1,-1 abc + 1 1 1.0,2.0 0,0 XYZ+ + 2 0 2.0,3.0,0.0 0,1 !@#$%^&*() """ ) i = tskit.parse_individuals( individuals, strict=False, encoding="utf8", base64_metadata=False ) expected = [ - (1, [0.0, 1.0, 0.0], "abc"), - (1, [1.0, 2.0], "XYZ+"), - (0, [2.0, 3.0, 0.0], "!@#$%^&*()"), + (1, [0.0, 1.0, 0.0], [-1, -1], "abc"), + (1, [1.0, 2.0], [0, 0], "XYZ+"), + (0, [2.0, 3.0, 0.0], [0, 1], "!@#$%^&*()"), ] for a, b in zip(expected, i): assert a[0] == b.flags assert len(a[1]) == len(b.location) for x, y in zip(a[1], b.location): assert x == y - assert a[2].encode("utf8") == b.metadata + assert len(a[2]) == len(b.parents) + for x, y in zip(a[2], b.parents): + assert x == y + assert a[3].encode("utf8") == b.metadata def test_nodes(self): nodes = io.StringIO( diff --git a/python/tests/test_tables.py b/python/tests/test_tables.py index a7995fdeb7..46999e0154 100644 --- a/python/tests/test_tables.py +++ b/python/tests/test_tables.py @@ -866,19 +866,21 @@ class TestIndividualTable(CommonTestsMixin, MetadataTestsMixin): def test_simple_example(self): t = tskit.IndividualTable() - t.add_row(flags=0, location=[], metadata=b"123") - t.add_row(flags=1, location=(0, 1, 2, 3), metadata=b"\xf0") - t.add_row(flags=2, parents=[0, 1]) + t.add_row(flags=0, location=[], parents=[], metadata=b"123") + t.add_row( + flags=1, location=(0, 1, 2, 3), parents=(4, 5, 6, 7), metadata=b"\xf0" + ) s = str(t) assert len(s) > 0 - assert len(t) == 3 + assert len(t) == 2 assert t[0].flags == 0 assert list(t[0].location) == [] + assert list(t[0].parents) == [] assert t[0].metadata == b"123" assert t[1].flags == 1 assert list(t[1].location) == [0, 1, 2, 3] + assert list(t[1].parents) == [4, 5, 6, 7] assert t[1].metadata == b"\xf0" - assert list(t[2].parents) == [0, 1] with pytest.raises(IndexError): t.__getitem__(-4) @@ -915,6 +917,17 @@ def test_packset_location(self): assert list(t[0].location) == [0] assert list(t[1].location) == [1, 2, 3] + def test_packset_parents(self): + t = tskit.IndividualTable() + t.add_row(flags=0) + t.packset_parents([[0, 2]]) + assert list(t[0].parents) == [0, 2] + t.add_row(flags=1) + assert list(t[1].parents) == [] + t.packset_parents([[0], [1, 2, 3]]) + assert list(t[0].parents) == [0] + assert list(t[1].parents) == [1, 2, 3] + def test_missing_time_equal_to_self(self): t = tskit.TableCollection(sequence_length=10) t.sites.add_row(position=1, ancestral_state="0") diff --git a/python/tests/tsutil.py b/python/tests/tsutil.py index 1554b2bc23..7c74f5c1d1 100644 --- a/python/tests/tsutil.py +++ b/python/tests/tsutil.py @@ -401,6 +401,8 @@ def add_random_metadata(ts, seed=1, max_length=10): flags=individuals.flags, location=individuals.location, location_offset=individuals.location_offset, + parents=individuals.parents, + parents_offset=individuals.parents_offset, metadata_offset=offset, metadata=metadata, ) @@ -655,7 +657,10 @@ def py_union(tables, other, nodes, record_provenance=True, add_populations=True) if ind_map[node.individual] == tskit.NULL and node.individual != tskit.NULL: ind = other.individuals[node.individual] ind_id = tables.individuals.add_row( - flags=ind.flags, location=ind.location, metadata=ind.metadata + flags=ind.flags, + location=ind.location, + parents=ind.parents, + metadata=ind.metadata, ) ind_map[node.individual] = ind_id if pop_map[node.population] == tskit.NULL and node.population != tskit.NULL: diff --git a/python/tskit/tables.py b/python/tskit/tables.py index 5e2c53b12f..f020126dd5 100644 --- a/python/tskit/tables.py +++ b/python/tskit/tables.py @@ -656,6 +656,21 @@ def packset_location(self, locations): d["location_offset"] = offset self.set_columns(**d) + def packset_parents(self, parents): + """ + Packs the specified list of parent values and updates the ``parent`` + and ``parent_offset`` columns. The length of the parents array + must be equal to the number of rows in the table. + + :param list parents: A list of list of parent ids, interpreted as numpy int32 + arrays + """ + packed, offset = util.pack_arrays(parents, np.int32) + d = self.asdict() + d["parents"] = packed + d["parents_offset"] = offset + self.set_columns(**d) + class NodeTable(BaseTable, MetadataMixin): """ diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 320386fefe..879f5d82a6 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -2690,11 +2690,16 @@ def parse_individuals( header = source.readline().strip("\n").split(sep) flags_index = header.index("flags") location_index = None + parents_index = None metadata_index = None try: location_index = header.index("location") except ValueError: pass + try: + parents_index = header.index("parents") + except ValueError: + pass try: metadata_index = header.index("metadata") except ValueError: @@ -2708,12 +2713,19 @@ def parse_individuals( location_string = tokens[location_index] if len(location_string) > 0: location = tuple(map(float, location_string.split(","))) + parents = () + if parents_index is not None: + parents_string = tokens[parents_index] + if len(parents_string) > 0: + parents = tuple(map(int, parents_string.split(","))) metadata = b"" if metadata_index is not None and metadata_index < len(tokens): metadata = tokens[metadata_index].encode(encoding) if base64_metadata: metadata = base64.b64decode(metadata) - table.add_row(flags=flags, location=location, metadata=metadata) + table.add_row( + flags=flags, location=location, parents=parents, metadata=metadata + ) return table diff --git a/python/tskit/util.py b/python/tskit/util.py index 963c0cfae3..b8ddf4790f 100644 --- a/python/tskit/util.py +++ b/python/tskit/util.py @@ -174,7 +174,7 @@ def unpack_strings(packed, offset, encoding="utf8"): return [b.decode(encoding) for b in unpack_bytes(packed, offset)] -def pack_arrays(list_of_lists): +def pack_arrays(list_of_lists, dtype=np.float64): """ Packs the specified list of numberic lists into a flattened numpy array of numpy float64 and corresponding offsets. See @@ -182,16 +182,17 @@ def pack_arrays(list_of_lists): of variable length data. :param list[list] list_of_lists: The list of numeric lists to encode. + :param dtype: The dtype for the packed array, defualts to float64 :return: The tuple (packed, offset) of numpy arrays representing the flattened input data and offsets. - :rtype: numpy.array (dtype=np.float64), numpy.array (dtype=np.uint32) + :rtype: numpy.array (dtype=dtype), numpy.array (dtype=np.uint32) """ # TODO must be possible to do this more efficiently with numpy n = len(list_of_lists) offset = np.zeros(n + 1, dtype=np.uint32) for j in range(n): offset[j + 1] = offset[j] + len(list_of_lists[j]) - data = np.empty(offset[-1], dtype=np.float64) + data = np.empty(offset[-1], dtype=dtype) for j in range(n): data[offset[j] : offset[j + 1]] = list_of_lists[j] return data, offset