diff --git a/docs/data-model.md b/docs/data-model.md index 6d545232ac..3b7edd3327 100644 --- a/docs/data-model.md +++ b/docs/data-model.md @@ -906,7 +906,7 @@ from IPython.display import HTML def html_quintuple_table(ts, show_virtual_root=False, show_convenience_arrays=False): tree = ts.first() columns = ["node", "parent", "left_child", "right_child", "left_sib", "right_sib"] - convenience_arrays = ["num_children"] + convenience_arrays = ["num_children", "edge"] if show_convenience_arrays: columns += convenience_arrays data = {k:[] for k in columns} @@ -965,7 +965,7 @@ information on each node in the tree. These arrays are not essential to represent the trees within a treesequence. However, they can be useful for specific algorithms (e.g. when computing tree (im)balance metrics). The convience arrays that have been implemented are: -{attr}`Tree.num_children_array`. +{attr}`Tree.num_children_array`, {attr}`Tree.edge_array`. Adding convenience arrays to the example above results in this table: diff --git a/docs/python-api.md b/docs/python-api.md index 7aad6f155a..b254a8af89 100644 --- a/docs/python-api.md +++ b/docs/python-api.md @@ -445,6 +445,7 @@ Node information Tree.right_child Tree.left_child Tree.children + Tree.edge Descendant nodes .. autosummary:: @@ -479,6 +480,7 @@ high performance interface which can be used in conjunction with the equivalent Tree.left_sib_array Tree.right_sib_array Tree.num_children_array + Tree.edge_array ``` diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index d21ceb730b..efe129e04f 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -72,6 +72,10 @@ and maintainability. Please open an issue if this affects your application. (:user:`jeromekelleher`, :user:`benjeffery`, :pr:`2120`). +- Add ``Tree.edge_array`` and ``Tree.edge``. Returns the edge id of the edge encoding + the relationship of each node with its parent. + (:user:`GertjanBisschop`, :issue:`2361`, :pr:`2357`) + **Breaking Changes** - The JSON metadata codec now interprets the empty string as an empty object. This means diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c index b00de231c9..7d40bc0b8f 100644 --- a/python/_tskitmodule.c +++ b/python/_tskitmodule.c @@ -10311,6 +10311,22 @@ Tree_get_right_sib(Tree *self, PyObject *args) return ret; } +static PyObject * +Tree_get_edge(Tree *self, PyObject *args) +{ + PyObject *ret = NULL; + tsk_id_t edge_id; + int node; + + if (Tree_get_node_argument(self, args, &node) != 0) { + goto out; + } + edge_id = self->tree->edge[node]; + ret = Py_BuildValue("i", (int) edge_id); +out: + return ret; +} + static PyObject * Tree_get_children(Tree *self, PyObject *args) { @@ -11052,6 +11068,19 @@ Tree_get_num_children_array(Tree *self, void *closure) return ret; } +static PyObject * +Tree_get_edge_array(Tree *self, void *closure) +{ + PyObject *ret = NULL; + + if (Tree_check_state(self) != 0) { + goto out; + } + ret = Tree_make_array(self, NPY_INT32, self->tree->edge); +out: + return ret; +} + static PyGetSetDef Tree_getsetters[] = { { .name = "parent_array", .get = (getter) Tree_get_parent_array, @@ -11071,6 +11100,9 @@ static PyGetSetDef Tree_getsetters[] { .name = "num_children_array", .get = (getter) Tree_get_num_children_array, .doc = "The num_children array in the quintuply linked tree." }, + { .name = "edge_array", + .get = (getter) Tree_get_edge_array, + .doc = "The edge array in the quintuply linked tree." }, { NULL } }; static PyMethodDef Tree_methods[] = { @@ -11182,6 +11214,10 @@ static PyMethodDef Tree_methods[] = { .ml_meth = (PyCFunction) Tree_get_right_sib, .ml_flags = METH_VARARGS, .ml_doc = "Returns the right-most sib of node u" }, + { .ml_name = "get_edge", + .ml_meth = (PyCFunction) Tree_get_edge, + .ml_flags = METH_VARARGS, + .ml_doc = "Returns the edge id connecting node u to its parent" }, { .ml_name = "get_children", .ml_meth = (PyCFunction) Tree_get_children, .ml_flags = METH_VARARGS, diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index ec4b8a79c1..3dd48b2cd0 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -2364,7 +2364,8 @@ def modify(ts, func): def test_tree_node_edges(self): for ts in get_example_tree_sequences(): edge_visited = np.zeros(ts.num_edges, dtype=bool) - for mapping, tree in zip(ts._tree_node_edges(), ts.trees()): + for tree in ts.trees(): + mapping = tree.edge_array node_mapped = mapping >= 0 edge_visited[mapping[node_mapped]] = True # Note that tree.nodes() does not necessarily list all the nodes @@ -3711,6 +3712,7 @@ def verify_tree_arrays(self, tree): assert tree.left_sib_array.shape == (N,) assert tree.right_sib_array.shape == (N,) assert tree.num_children_array.shape == (N,) + assert tree.edge_array.shape == (N,) for u in range(N): assert tree.parent(u) == tree.parent_array[u] assert tree.left_child(u) == tree.left_child_array[u] @@ -3718,6 +3720,7 @@ def verify_tree_arrays(self, tree): assert tree.left_sib(u) == tree.left_sib_array[u] assert tree.right_sib(u) == tree.right_sib_array[u] assert tree.num_children(u) == tree.num_children_array[u] + assert tree.edge(u) == tree.edge_array[u] def verify_tree_arrays_python_ts(self, ts): pts = tests.PythonTreeSequence(ts) @@ -3730,6 +3733,7 @@ def verify_tree_arrays_python_ts(self, ts): assert np.all(st1.left_sib_array == st2.left_sib) assert np.all(st1.right_sib_array == st2.right_sib) assert np.all(st1.num_children_array == st2.num_children) + assert np.all(st1.edge_array == st2.edge) def test_tree_arrays(self): ts = msprime.simulate(10, recombination_rate=1, random_seed=1) @@ -3747,6 +3751,7 @@ def test_tree_arrays(self): "left_sib", "right_sib", "num_children", + "edge", ], ) def test_tree_array_properties(self, array): @@ -3770,6 +3775,7 @@ def verify_empty_tree(self, tree): assert tree.left_child(u) == tskit.NULL assert tree.right_child(u) == tskit.NULL assert tree.num_children(u) == 0 + assert tree.edge(u) == tskit.NULL if not ts.node(u).is_sample(): assert tree.left_sib(u) == tskit.NULL assert tree.right_sib(u) == tskit.NULL @@ -3817,6 +3823,7 @@ def verify_trees_identical(self, t1, t2): assert np.all(t1.left_sib_array == t2.left_sib_array) assert np.all(t1.right_sib_array == t2.right_sib_array) assert np.all(t1.num_children_array == t2.num_children_array) + assert np.all(t1.edge_array == t2.edge_array) assert list(t1.sites()) == list(t2.sites()) def test_copy_seek(self): @@ -3924,7 +3931,8 @@ def test_node_edges(self): for tree in ts.trees(): nodes = set(tree.nodes()) midpoint = sum(tree.interval) / 2 - mapping = tree._node_edges() + # mapping = tree._node_edges() + mapping = tree.edge_array for node, edge in enumerate(mapping): if node in nodes and tree.parent(node) != tskit.NULL: edge_above_node = np.where( diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index 058a9beefc..dbadb0ef82 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -2684,6 +2684,7 @@ class TestTree(LowLevelTestCase): "left_sib", "right_sib", "num_children", + "edge", ] def test_options(self): @@ -3036,6 +3037,7 @@ def check_tree(tree): assert tree.get_left_child(u) == _tskit.NULL assert tree.get_right_child(u) == _tskit.NULL assert tree.get_num_children(u) == 0 + assert tree.get_edge(u) == _tskit.NULL tree = _tskit.Tree(ts) check_tree(tree) diff --git a/python/tskit/drawing.py b/python/tskit/drawing.py index 2d025a22f0..030915627c 100644 --- a/python/tskit/drawing.py +++ b/python/tskit/drawing.py @@ -1174,7 +1174,7 @@ def __init__( tree_right = tree.interval.right edge_left = ts.tables.edges.left edge_right = ts.tables.edges.right - node_edges = tree._node_edges() + node_edges = tree.edge_array # whittle mutations down so we only need look at those above the tree nodes mut_t = ts.tables.mutations focal_mutations = np.isin(mut_t.node, np.fromiter(nodes, mut_t.node.dtype)) diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 3e31aa4db3..6bad76e857 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -689,6 +689,7 @@ def _make_arrays(self): self._left_sib_array = self._ll_tree.left_sib_array self._right_sib_array = self._ll_tree.right_sib_array self._num_children_array = self._ll_tree.num_children_array + self._edge_array = self._ll_tree.edge_array @property def tree_sequence(self): @@ -1199,6 +1200,35 @@ def num_children_array(self): """ return self._num_children_array + def edge(self, u): + """ + Returns the id of the edge encoding the relationship between ``u`` + and its parent, or :data:`tskit.NULL` if ``u`` is a root, virtual root + or is not a node in the current tree. + + :param int u: The node of interest. + :return: Id of edge connecting u to its parent. + :rtype: int + """ + return self._ll_tree.get_edge(u) + + @property + def edge_array(self): + """ + A numpy array (dtype=np.int32) of edge ids encoding the relationship + between the child node ``u`` and its parent, such that + ``tree.edge_array[u] == tree.edge(u)`` for all + ``0 <= u <= ts.num_nodes``. See the :meth:`~.edge` + method for details on the semantics of tree edge and the + :ref:`sec_data_model_tree_structure` section for information on the + quintuply linked tree encoding. + + .. include:: substitutions/virtual_root_array_note.rst + + .. include:: substitutions/tree_array_warning.rst + """ + return self._edge_array + # Sample list. def left_sample(self, u):