Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -6556,6 +6556,7 @@ test_single_tree_balance(void)
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin, colless;
double b1;

tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
Expand All @@ -6569,6 +6570,8 @@ test_single_tree_balance(void)
CU_ASSERT_EQUAL(sackin, 8);
CU_ASSERT_EQUAL_FATAL(tsk_tree_colless_index(&t, &colless), 0);
CU_ASSERT_EQUAL(colless, 0);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
CU_ASSERT_DOUBLE_EQUAL(b1, 2, 1e-8);

tsk_treeseq_free(&ts);
tsk_tree_free(&t);
Expand All @@ -6581,6 +6584,7 @@ test_multiroot_balance(void)
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin;
double b1;

tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
Expand All @@ -6603,8 +6607,9 @@ test_multiroot_balance(void)

CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);
CU_ASSERT_EQUAL(sackin, 7);

CU_ASSERT_EQUAL_FATAL(tsk_tree_colless_index(&t, NULL), TSK_ERR_UNDEFINED_MULTIROOT);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
CU_ASSERT_DOUBLE_EQUAL(b1, 1.0, 1e-8);

tsk_treeseq_free(&ts);
tsk_tree_free(&t);
Expand All @@ -6623,6 +6628,7 @@ test_nonbinary_balance(void)
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin, colless;
double b1;

tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
Expand All @@ -6635,6 +6641,8 @@ test_nonbinary_balance(void)
CU_ASSERT_EQUAL(sackin, 4);
CU_ASSERT_EQUAL_FATAL(
tsk_tree_colless_index(&t, &colless), TSK_ERR_UNDEFINED_NONBINARY);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(b1, 0, 1e-8);

tsk_treeseq_free(&ts);
tsk_tree_free(&t);
Expand All @@ -6648,6 +6656,7 @@ test_empty_tree_balance(void)
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin, colless;
double b1;

ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
Expand All @@ -6664,6 +6673,8 @@ test_empty_tree_balance(void)
/* Technically wrong here because we have 0 roots, but not worth worrying about */
CU_ASSERT_EQUAL_FATAL(
tsk_tree_colless_index(&t, &colless), TSK_ERR_UNDEFINED_MULTIROOT);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
CU_ASSERT_EQUAL(b1, 0);

tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
Expand Down
40 changes: 40 additions & 0 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -4961,6 +4961,46 @@ tsk_tree_colless_index(const tsk_tree_t *self, tsk_size_t *result)
return ret;
}

int
tsk_tree_b1_index(const tsk_tree_t *self, double *result)
{
int ret = 0;
const tsk_id_t *restrict parent = self->parent;
const tsk_id_t *restrict right_child = self->right_child;
const tsk_id_t *restrict left_sib = self->left_sib;
tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));
tsk_size_t *max_path_length = tsk_calloc(self->num_nodes, sizeof(*max_path_length));
tsk_size_t j, num_nodes, mpl;
double total = 0.0;
tsk_id_t u, v;

if (nodes == NULL || max_path_length == NULL) {
ret = TSK_ERR_NO_MEMORY;
goto out;
}
ret = tsk_tree_postorder(self, nodes, &num_nodes);
if (ret != 0) {
goto out;
}

for (j = 0; j < num_nodes; j++) {
u = nodes[j];
if (parent[u] != TSK_NULL && right_child[u] != TSK_NULL) {
mpl = 0;
for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {
mpl = TSK_MAX(mpl, max_path_length[v]);
}
max_path_length[u] = mpl + 1;
total += 1 / (double) max_path_length[u];
}
}
*result = total;
out:
tsk_safe_free(nodes);
tsk_safe_free(max_path_length);
return ret;
}

/* Parsimony methods */

static inline uint64_t
Expand Down
1 change: 1 addition & 0 deletions c/tskit/trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -1688,6 +1688,7 @@ int tsk_tree_kc_distance(
* C API 1.0, but should be straightforward to document based on Python docs. */
int tsk_tree_sackin_index(const tsk_tree_t *self, tsk_size_t *result);
int tsk_tree_colless_index(const tsk_tree_t *self, tsk_size_t *result);
int tsk_tree_b1_index(const tsk_tree_t *self, double *result);

/* Things to consider removing: */

Expand Down
10 changes: 10 additions & 0 deletions docs/python-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,16 @@ Functions and static methods
Tree.kc_distance
```

(sec_python_api_trees_balance)=

#### Balance/imbalance indices

```{eval-rst}
.. autosummary::
Tree.colless_index
Tree.sackin_index
Tree.b1_index
```

(sec_python_api_trees_sites_mutations)=

Expand Down
16 changes: 14 additions & 2 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,22 @@
- Add support for missing data to ``write_vcf``, and add the ``isolated_as_missing``
argument. (:user:`jeromekelleher`, :pr:`2329`, :issue:`447`).

- Add ``Tree.num_children_array`` and ``Tree.num_children``. Returns the counts of
the number of child nodes for each or a single node in the tree respectively.
- Add ``Tree.num_children_array`` and ``Tree.num_children``. Returns the counts of
the number of child nodes for each or a single node in the tree respectively.
(:user:`GertjanBisschop`, :issue:`2318`, :issue:`2319`, :pr:`2332`)

- Add ``Tree.path_length``.
(:user:`jeremyguez`, :issue:`2249`, :pr:`2259`).

- Add B1 tree balance index.
(:user:`jeremyguez`, :user:`jeromekelleher`, :issue:`2251`, :pr:`2281`, :pr:`2346`).

- Add Sackin tree imbalance index.
(:user:`jeremyguez`, :user:`jeromekelleher`, :pr:`2246`, :pr:`2258`).

- Add Colless tree imbalance index.
(:user:`jeremyguez`, :user:`jeromekelleher`, :issue:`2250`, :pr:`2266`, :pr:`2344`).

**Breaking Changes**

- The JSON metadata codec now interprets the empty string as an empty object. This means
Expand Down
25 changes: 25 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -10802,6 +10802,27 @@ Tree_get_colless_index(Tree *self)
return ret;
}

static PyObject *
Tree_get_b1_index(Tree *self)
{
PyObject *ret = NULL;
int err;
double result;

if (Tree_check_state(self) != 0) {
goto out;
}

err = tsk_tree_b1_index(self->tree, &result);
if (err != 0) {
handle_library_error(err);
goto out;
}
ret = Py_BuildValue("d", result);
out:
return ret;
}

static PyObject *
Tree_get_root_threshold(Tree *self)
{
Expand Down Expand Up @@ -11221,6 +11242,10 @@ static PyMethodDef Tree_methods[] = {
.ml_meth = (PyCFunction) Tree_get_colless_index,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns the Colless index for this tree." },
{ .ml_name = "get_b1_index",
.ml_meth = (PyCFunction) Tree_get_b1_index,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns the B1 index for this tree." },
{ NULL } /* Sentinel */
};

Expand Down
39 changes: 17 additions & 22 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -2783,35 +2783,28 @@ def path_length(self, u, v):

def b1_index(self):
"""
Returns the B1 balance index for this tree.
This is defined as the inverse of the sum of all longest paths
to leaves for each node besides roots.
Returns the
`B1 balance index <https://treebalance.wordpress.com/b%E2%82%81-index/>`_
for this tree. This is defined as the inverse of the sum of all
longest paths to leaves for each node besides roots.

.. seealso:: See `Shao and Sokal (1990)
<https://www.jstor.org/stable/2992186>`_ for details.

:return: The B1 balance index.
:rtype: float
"""
# TODO implement in C
max_path_length = np.zeros(self.tree_sequence.num_nodes, dtype=int)
total = 0.0
for u in self.postorder():
if self.parent(u) != tskit.NULL and self.is_internal(u):
max_path_length[u] = 1 + max(
max_path_length[v] for v in self.children(u)
)
total += 1 / max_path_length[u]
return total
return self._ll_tree.get_b1_index()

def colless_index(self):
"""
Returns the Colless imbalance index for this tree.
This is defined as the sum of all differences between number of
leaves subtended by the left and right child of each node.
The Colless index is undefined for non-binary trees and trees
with multiple roots. This method will raise a LibraryError if the
tree is not singly-rooted and binary.
Returns the
`Colless imbalance index <https://treebalance.wordpress.com/colless-index/>`_
for this tree. This is defined as the sum of all differences between
number of leaves subtended by the left and right child of each node.
The Colless index is undefined for non-binary trees and trees with
multiple roots. This method will raise a LibraryError if the tree is
not singly-rooted and binary.

.. seealso:: See `Shao and Sokal (1990)
<https://www.jstor.org/stable/2992186>`_ for details.
Expand All @@ -2823,9 +2816,11 @@ def colless_index(self):

def sackin_index(self):
"""
Returns the Sackin imbalance index for this tree. This is defined
as the sum of the depths of all leaves in the tree.
Equivalent to ``sum(tree.depth(u) for u in tree.leaves())``
Returns the
`Sackin imbalance index <https://treebalance.wordpress.com/sackin-index/>`_
for this tree. This is defined as the sum of the depths of all leaves
in the tree. Equivalent to ``sum(tree.depth(u) for u in
tree.leaves())``

.. seealso:: See `Shao and Sokal (1990)
<https://www.jstor.org/stable/2992186>`_ for details.
Expand Down