diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c
index fc679abb8e..e2bf440194 100644
--- a/c/tests/test_trees.c
+++ b/c/tests/test_trees.c
@@ -6556,6 +6556,7 @@ test_single_tree_balance(void)
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin, colless;
+ double b1;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
@@ -6569,6 +6570,8 @@ test_single_tree_balance(void)
CU_ASSERT_EQUAL(sackin, 8);
CU_ASSERT_EQUAL_FATAL(tsk_tree_colless_index(&t, &colless), 0);
CU_ASSERT_EQUAL(colless, 0);
+ CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
+ CU_ASSERT_DOUBLE_EQUAL(b1, 2, 1e-8);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
@@ -6581,6 +6584,7 @@ test_multiroot_balance(void)
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin;
+ double b1;
tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
@@ -6603,8 +6607,9 @@ test_multiroot_balance(void)
CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);
CU_ASSERT_EQUAL(sackin, 7);
-
CU_ASSERT_EQUAL_FATAL(tsk_tree_colless_index(&t, NULL), TSK_ERR_UNDEFINED_MULTIROOT);
+ CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
+ CU_ASSERT_DOUBLE_EQUAL(b1, 1.0, 1e-8);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
@@ -6623,6 +6628,7 @@ test_nonbinary_balance(void)
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin, colless;
+ double b1;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
@@ -6635,6 +6641,8 @@ test_nonbinary_balance(void)
CU_ASSERT_EQUAL(sackin, 4);
CU_ASSERT_EQUAL_FATAL(
tsk_tree_colless_index(&t, &colless), TSK_ERR_UNDEFINED_NONBINARY);
+ CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
+ CU_ASSERT_DOUBLE_EQUAL_FATAL(b1, 0, 1e-8);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
@@ -6648,6 +6656,7 @@ test_empty_tree_balance(void)
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin, colless;
+ double b1;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
@@ -6664,6 +6673,8 @@ test_empty_tree_balance(void)
/* Technically wrong here because we have 0 roots, but not worth worrying about */
CU_ASSERT_EQUAL_FATAL(
tsk_tree_colless_index(&t, &colless), TSK_ERR_UNDEFINED_MULTIROOT);
+ CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
+ CU_ASSERT_EQUAL(b1, 0);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
diff --git a/c/tskit/trees.c b/c/tskit/trees.c
index a4ad97d02f..68f6ed262d 100644
--- a/c/tskit/trees.c
+++ b/c/tskit/trees.c
@@ -4961,6 +4961,46 @@ tsk_tree_colless_index(const tsk_tree_t *self, tsk_size_t *result)
return ret;
}
+int
+tsk_tree_b1_index(const tsk_tree_t *self, double *result)
+{
+ int ret = 0;
+ const tsk_id_t *restrict parent = self->parent;
+ const tsk_id_t *restrict right_child = self->right_child;
+ const tsk_id_t *restrict left_sib = self->left_sib;
+ tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));
+ tsk_size_t *max_path_length = tsk_calloc(self->num_nodes, sizeof(*max_path_length));
+ tsk_size_t j, num_nodes, mpl;
+ double total = 0.0;
+ tsk_id_t u, v;
+
+ if (nodes == NULL || max_path_length == NULL) {
+ ret = TSK_ERR_NO_MEMORY;
+ goto out;
+ }
+ ret = tsk_tree_postorder(self, nodes, &num_nodes);
+ if (ret != 0) {
+ goto out;
+ }
+
+ for (j = 0; j < num_nodes; j++) {
+ u = nodes[j];
+ if (parent[u] != TSK_NULL && right_child[u] != TSK_NULL) {
+ mpl = 0;
+ for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {
+ mpl = TSK_MAX(mpl, max_path_length[v]);
+ }
+ max_path_length[u] = mpl + 1;
+ total += 1 / (double) max_path_length[u];
+ }
+ }
+ *result = total;
+out:
+ tsk_safe_free(nodes);
+ tsk_safe_free(max_path_length);
+ return ret;
+}
+
/* Parsimony methods */
static inline uint64_t
diff --git a/c/tskit/trees.h b/c/tskit/trees.h
index dcc07d491c..e0e1c8d714 100644
--- a/c/tskit/trees.h
+++ b/c/tskit/trees.h
@@ -1688,6 +1688,7 @@ int tsk_tree_kc_distance(
* C API 1.0, but should be straightforward to document based on Python docs. */
int tsk_tree_sackin_index(const tsk_tree_t *self, tsk_size_t *result);
int tsk_tree_colless_index(const tsk_tree_t *self, tsk_size_t *result);
+int tsk_tree_b1_index(const tsk_tree_t *self, double *result);
/* Things to consider removing: */
diff --git a/docs/python-api.md b/docs/python-api.md
index 73eab4d096..79a44c05cc 100644
--- a/docs/python-api.md
+++ b/docs/python-api.md
@@ -546,6 +546,16 @@ Functions and static methods
Tree.kc_distance
```
+(sec_python_api_trees_balance)=
+
+#### Balance/imbalance indices
+
+```{eval-rst}
+.. autosummary::
+ Tree.colless_index
+ Tree.sackin_index
+ Tree.b1_index
+```
(sec_python_api_trees_sites_mutations)=
diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst
index 004f516ac9..00a4aaf916 100644
--- a/python/CHANGELOG.rst
+++ b/python/CHANGELOG.rst
@@ -47,10 +47,22 @@
- Add support for missing data to ``write_vcf``, and add the ``isolated_as_missing``
argument. (:user:`jeromekelleher`, :pr:`2329`, :issue:`447`).
-- Add ``Tree.num_children_array`` and ``Tree.num_children``. Returns the counts of
- the number of child nodes for each or a single node in the tree respectively.
+- Add ``Tree.num_children_array`` and ``Tree.num_children``. Returns the counts of
+ the number of child nodes for each or a single node in the tree respectively.
(:user:`GertjanBisschop`, :issue:`2318`, :issue:`2319`, :pr:`2332`)
+- Add ``Tree.path_length``.
+ (:user:`jeremyguez`, :issue:`2249`, :pr:`2259`).
+
+- Add B1 tree balance index.
+ (:user:`jeremyguez`, :user:`jeromekelleher`, :issue:`2251`, :pr:`2281`, :pr:`2346`).
+
+- Add Sackin tree imbalance index.
+ (:user:`jeremyguez`, :user:`jeromekelleher`, :pr:`2246`, :pr:`2258`).
+
+- Add Colless tree imbalance index.
+ (:user:`jeremyguez`, :user:`jeromekelleher`, :issue:`2250`, :pr:`2266`, :pr:`2344`).
+
**Breaking Changes**
- The JSON metadata codec now interprets the empty string as an empty object. This means
diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c
index a70c2ba73d..d46aaecfa1 100644
--- a/python/_tskitmodule.c
+++ b/python/_tskitmodule.c
@@ -10802,6 +10802,27 @@ Tree_get_colless_index(Tree *self)
return ret;
}
+static PyObject *
+Tree_get_b1_index(Tree *self)
+{
+ PyObject *ret = NULL;
+ int err;
+ double result;
+
+ if (Tree_check_state(self) != 0) {
+ goto out;
+ }
+
+ err = tsk_tree_b1_index(self->tree, &result);
+ if (err != 0) {
+ handle_library_error(err);
+ goto out;
+ }
+ ret = Py_BuildValue("d", result);
+out:
+ return ret;
+}
+
static PyObject *
Tree_get_root_threshold(Tree *self)
{
@@ -11221,6 +11242,10 @@ static PyMethodDef Tree_methods[] = {
.ml_meth = (PyCFunction) Tree_get_colless_index,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns the Colless index for this tree." },
+ { .ml_name = "get_b1_index",
+ .ml_meth = (PyCFunction) Tree_get_b1_index,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = "Returns the B1 index for this tree." },
{ NULL } /* Sentinel */
};
diff --git a/python/tskit/trees.py b/python/tskit/trees.py
index e8600d1db8..77571c9a1c 100644
--- a/python/tskit/trees.py
+++ b/python/tskit/trees.py
@@ -2783,9 +2783,10 @@ def path_length(self, u, v):
def b1_index(self):
"""
- Returns the B1 balance index for this tree.
- This is defined as the inverse of the sum of all longest paths
- to leaves for each node besides roots.
+ Returns the
+ `B1 balance index `_
+ for this tree. This is defined as the inverse of the sum of all
+ longest paths to leaves for each node besides roots.
.. seealso:: See `Shao and Sokal (1990)
`_ for details.
@@ -2793,25 +2794,17 @@ def b1_index(self):
:return: The B1 balance index.
:rtype: float
"""
- # TODO implement in C
- max_path_length = np.zeros(self.tree_sequence.num_nodes, dtype=int)
- total = 0.0
- for u in self.postorder():
- if self.parent(u) != tskit.NULL and self.is_internal(u):
- max_path_length[u] = 1 + max(
- max_path_length[v] for v in self.children(u)
- )
- total += 1 / max_path_length[u]
- return total
+ return self._ll_tree.get_b1_index()
def colless_index(self):
"""
- Returns the Colless imbalance index for this tree.
- This is defined as the sum of all differences between number of
- leaves subtended by the left and right child of each node.
- The Colless index is undefined for non-binary trees and trees
- with multiple roots. This method will raise a LibraryError if the
- tree is not singly-rooted and binary.
+ Returns the
+ `Colless imbalance index `_
+ for this tree. This is defined as the sum of all differences between
+ number of leaves subtended by the left and right child of each node.
+ The Colless index is undefined for non-binary trees and trees with
+ multiple roots. This method will raise a LibraryError if the tree is
+ not singly-rooted and binary.
.. seealso:: See `Shao and Sokal (1990)
`_ for details.
@@ -2823,9 +2816,11 @@ def colless_index(self):
def sackin_index(self):
"""
- Returns the Sackin imbalance index for this tree. This is defined
- as the sum of the depths of all leaves in the tree.
- Equivalent to ``sum(tree.depth(u) for u in tree.leaves())``
+ Returns the
+ `Sackin imbalance index `_
+ for this tree. This is defined as the sum of the depths of all leaves
+ in the tree. Equivalent to ``sum(tree.depth(u) for u in
+ tree.leaves())``
.. seealso:: See `Shao and Sokal (1990)
`_ for details.