diff --git a/c/tests/test_stats.c b/c/tests/test_stats.c
index 03797a62f1..2d24c4879c 100644
--- a/c/tests/test_stats.c
+++ b/c/tests/test_stats.c
@@ -276,6 +276,7 @@ verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)
     tsk_id_t index_tuples[2 * n * n];
     double D1[n * n], D2[n * n];
     tsk_size_t i, j, k;
+    tsk_flags_t lib_options = options;
 
     for (j = 0; j < n; j++) {
         sample_set_sizes[j] = 1;
@@ -284,8 +285,13 @@ verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)
             index_tuples[2 * (j * n + k) + 1] = (tsk_id_t) k;
         }
     }
+    /* Until we have mutation mode for divergence: */
+    if (!!(lib_options & TSK_STAT_MUTATION)) {
+        lib_options &= (tsk_flags_t) ~TSK_STAT_MUTATION;
+        lib_options |= (tsk_flags_t) TSK_STAT_SITE;
+    }
     ret = tsk_treeseq_divergence(
-        ts, n, sample_set_sizes, samples, n * n, index_tuples, 0, NULL, options, D1);
+        ts, n, sample_set_sizes, samples, n * n, index_tuples, 0, NULL, lib_options, D1);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
 
     ret = tsk_treeseq_divergence_matrix(
@@ -1315,6 +1321,12 @@ test_general_stat_input_errors(void)
         &ts, 1, &W, 0, general_stat_sum, NULL, 0, NULL, 0, &result);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_RESULT_DIMS);
 
+    /* Unsupported mode */
+    /* TODO: change when STAT_MUTATION is supported */
+    ret = tsk_treeseq_general_stat(
+        &ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL, TSK_STAT_MUTATION, &result);
+    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
+
     /* Multiple stats*/
     ret = tsk_treeseq_general_stat(&ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL,
         TSK_STAT_SITE | TSK_STAT_BRANCH, &result);
@@ -1464,18 +1476,26 @@ test_single_tree_divergence_matrix(void)
     tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
         single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
 
+    ret = tsk_treeseq_divergence_matrix(
+        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
+    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
+
+    ret = tsk_treeseq_divergence_matrix(
+        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_NODE, result);
+    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
+
     ret = tsk_treeseq_divergence_matrix(
         &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(16, result, D_branch);
 
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
+        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(16, result, D_site);
 
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);
+        &ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
 
     ret = tsk_treeseq_divergence_matrix(
@@ -1488,15 +1508,15 @@ test_single_tree_divergence_matrix(void)
         &ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_BRANCH, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);
+        &ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
 
     /* assert_arrays_almost_equal(4, result, D_site); */
 
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);
 
     tsk_treeseq_free(&ts);
 }
@@ -1542,7 +1562,7 @@ test_single_tree_divergence_matrix_internal_samples(void)
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(16, result, D);
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
+        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(16, result, D);
 
@@ -1551,7 +1571,7 @@ test_single_tree_divergence_matrix_internal_samples(void)
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(16, result, D);
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 4, sizes, samples, 0, NULL, TSK_STAT_SITE, result);
+        &ts, 4, sizes, samples, 0, NULL, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(16, result, D);
 
@@ -1560,14 +1580,14 @@ test_single_tree_divergence_matrix_internal_samples(void)
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(16, result, D);
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 4, NULL, samples, 0, NULL, TSK_STAT_SITE, result);
+        &ts, 4, NULL, samples, 0, NULL, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(16, result, D);
 
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);
 
     tsk_treeseq_free(&ts);
     free(result);
@@ -1616,8 +1636,8 @@ test_single_tree_divergence_matrix_multi_root(void)
 
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);
 
     tsk_treeseq_free(&ts);
 }
@@ -2250,6 +2270,9 @@ test_paper_ex_genetic_relatedness_vector_errors(void)
     ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,
         num_samples, ts.samples, result, TSK_STAT_NODE);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
+    ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,
+        num_samples, ts.samples, result, TSK_STAT_MUTATION);
+    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
 
     tsk_treeseq_free(&ts);
     free(weights);
@@ -2503,6 +2526,10 @@ test_paper_ex_afs_errors(void)
         &ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_NODE, result);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
 
+    ret = tsk_treeseq_allele_frequency_spectrum(
+        &ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_MUTATION, result);
+    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
+
     ret = tsk_treeseq_allele_frequency_spectrum(&ts, 2, sample_set_sizes, samples, 0,
         NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SITE, result);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
@@ -2557,8 +2584,8 @@ test_paper_ex_divergence_matrix(void)
 
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);
 
     tsk_treeseq_free(&ts);
 }
@@ -3790,6 +3817,10 @@ test_two_locus_stat_input_errors(void)
         positions, 10, NULL, positions, TSK_STAT_NODE, result);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
 
+    ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,
+        positions, 10, NULL, positions, TSK_STAT_MUTATION, result);
+    CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
+
     num_sample_sets = 2;
     num_index_tuples = 0;
     ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
@@ -3856,7 +3887,7 @@ test_simplest_divergence_matrix(void)
     assert_arrays_almost_equal(4, D_site, result);
 
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);
+        &ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(4, D_site, result);
 
@@ -3866,7 +3897,7 @@ test_simplest_divergence_matrix(void)
     assert_arrays_almost_equal(4, D_branch, result);
 
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
+        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(4, D_site, result);
 
@@ -3879,7 +3910,7 @@ test_simplest_divergence_matrix(void)
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_POLARISED_UNSUPPORTED);
 
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE | TSK_STAT_BRANCH, result);
+        &ts, 0, NULL, NULL, 0, NULL, TSK_STAT_MUTATION | TSK_STAT_BRANCH, result);
     CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
 
     sample_ids[0] = -1;
@@ -3935,7 +3966,7 @@ test_simplest_divergence_matrix_windows(void)
 
     /* Windows for the second half */
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_SITE, result);
+        &ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(4, D_site, result);
     ret = tsk_treeseq_divergence_matrix(
@@ -3985,7 +4016,7 @@ test_simplest_divergence_matrix_internal_sample(void)
     assert_arrays_almost_equal(9, D_branch, result);
 
     ret = tsk_treeseq_divergence_matrix(
-        &ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);
+        &ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_MUTATION, result);
     CU_ASSERT_EQUAL_FATAL(ret, 0);
     assert_arrays_almost_equal(9, D_site, result);
 
@@ -4002,8 +4033,8 @@ test_multiroot_divergence_matrix(void)
 
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
     verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE);
-    verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION);
+    verify_divergence_matrix(&ts, TSK_STAT_MUTATION | TSK_STAT_SPAN_NORMALISE);
 
     tsk_treeseq_free(&ts);
 }
diff --git a/c/tskit/trees.c b/c/tskit/trees.c
index f00bb83d28..05a73bb5c9 100644
--- a/c/tskit/trees.c
+++ b/c/tskit/trees.c
@@ -2041,13 +2041,18 @@ tsk_treeseq_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
     bool stat_site = !!(options & TSK_STAT_SITE);
     bool stat_branch = !!(options & TSK_STAT_BRANCH);
     bool stat_node = !!(options & TSK_STAT_NODE);
+    bool stat_mutation = !!(options & TSK_STAT_MUTATION);
     double default_windows[] = { 0, self->tables->sequence_length };
     tsk_size_t row_size;
 
     /* If no mode is specified, we default to site mode */
-    if (!(stat_site || stat_branch || stat_node)) {
+    if (!(stat_site || stat_branch || stat_node || stat_mutation)) {
         stat_site = true;
     }
+    if (stat_mutation) {
+        ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
+        goto out;
+    }
     /* It's an error to specify more than one mode */
     if (stat_site + stat_branch + stat_node > 1) {
         ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);
@@ -3402,8 +3407,8 @@ tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sampl
         .sample_set_sizes = sample_set_sizes,
         .set_indexes = set_indexes };
 
-    // We do not support two-locus node stats
-    if (!!(options & TSK_STAT_NODE)) {
+    // We do not support two-locus node or mutation stats
+    if (!!(options & (TSK_STAT_NODE | TSK_STAT_MUTATION))) {
         ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
         goto out;
     }
@@ -3821,7 +3826,6 @@ tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self,
     int ret = 0;
     bool stat_site = !!(options & TSK_STAT_SITE);
     bool stat_branch = !!(options & TSK_STAT_BRANCH);
-    bool stat_node = !!(options & TSK_STAT_NODE);
     const double default_windows[] = { 0, self->tables->sequence_length };
     const double default_time_windows[] = { 0, INFINITY };
     const tsk_size_t num_nodes = self->tables->nodes.num_rows;
@@ -3833,7 +3837,7 @@ tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self,
      * reuse code from the general_stats code paths. */
     double *counts = NULL;
     double *count_row;
-    if (stat_node) {
+    if (!!(options & (TSK_STAT_NODE | TSK_STAT_MUTATION))) {
         ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
         goto out;
     }
@@ -8751,11 +8755,11 @@ remap_to_sample_sets(const tsk_size_t num_samples, const tsk_id_t *restrict samp
 }
 
 static int
-tsk_treeseq_divergence_matrix_site(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
-    const tsk_id_t *restrict sample_set_index_map, const tsk_size_t num_samples,
-    const tsk_id_t *restrict samples, tsk_size_t num_windows,
-    const double *restrict windows, tsk_flags_t TSK_UNUSED(options),
-    double *restrict result)
+tsk_treeseq_divergence_matrix_mutation(const tsk_treeseq_t *self,
+    tsk_size_t num_sample_sets, const tsk_id_t *restrict sample_set_index_map,
+    const tsk_size_t num_samples, const tsk_id_t *restrict samples,
+    tsk_size_t num_windows, const double *restrict windows,
+    tsk_flags_t TSK_UNUSED(options), double *restrict result)
 {
     int ret = 0;
     tsk_size_t i;
@@ -8913,20 +8917,21 @@ tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_sample_s
     bool stat_site = !!(options & TSK_STAT_SITE);
     bool stat_branch = !!(options & TSK_STAT_BRANCH);
     bool stat_node = !!(options & TSK_STAT_NODE);
+    bool stat_mutation = !!(options & TSK_STAT_MUTATION);
     tsk_id_t *sample_set_index_map
         = tsk_malloc(num_nodes * sizeof(*sample_set_index_map));
     tsk_size_t j;
 
-    if (stat_node) {
+    if (stat_node || stat_site) {
         ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
         goto out;
     }
-    /* If no mode is specified, we default to site mode */
-    if (!(stat_site || stat_branch)) {
-        stat_site = true;
+    /* If no mode is specified, we default to mutation mode */
+    if (!(stat_mutation || stat_branch)) {
+        stat_mutation = true;
     }
     /* It's an error to specify more than one mode */
-    if (stat_site + stat_branch > 1) {
+    if (stat_mutation + stat_branch > 1) {
         ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);
         goto out;
     }
@@ -8982,8 +8987,8 @@ tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_sample_s
         ret = tsk_treeseq_divergence_matrix_branch(self, N, sample_set_sizes,
             sample_sets, num_windows, windows, options, result);
     } else {
-        tsk_bug_assert(stat_site);
-        ret = tsk_treeseq_divergence_matrix_site(self, N, sample_set_index_map,
+        tsk_bug_assert(stat_mutation);
+        ret = tsk_treeseq_divergence_matrix_mutation(self, N, sample_set_index_map,
             total_samples, sample_sets, num_windows, windows, options, result);
     }
     if (ret != 0) {
@@ -10788,11 +10793,12 @@ tsk_treeseq_genetic_relatedness_vector(const tsk_treeseq_t *self, tsk_size_t num
     int ret = 0;
     bool stat_site = !!(options & TSK_STAT_SITE);
     bool stat_node = !!(options & TSK_STAT_NODE);
+    bool stat_mutation = !!(options & TSK_STAT_MUTATION);
     tsk_matvec_calculator_t calc;
 
     memset(&calc, 0, sizeof(calc));
 
-    if (stat_node || stat_site) {
+    if (stat_node || stat_site || stat_mutation) {
         ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
         goto out;
     }
diff --git a/c/tskit/trees.h b/c/tskit/trees.h
index 21495edbf7..7838cc4628 100644
--- a/c/tskit/trees.h
+++ b/c/tskit/trees.h
@@ -47,6 +47,7 @@ extern "C" {
 #define TSK_STAT_SITE               (1 << 0)
 #define TSK_STAT_BRANCH             (1 << 1)
 #define TSK_STAT_NODE               (1 << 2)
+#define TSK_STAT_MUTATION           (1 << 3)
 
 /* Leave room for other stat types */
 #define TSK_STAT_POLARISED               (1 << 10)
diff --git a/docs/python-api.md b/docs/python-api.md
index 6ed59f981c..44a2979bd7 100644
--- a/docs/python-api.md
+++ b/docs/python-api.md
@@ -323,6 +323,7 @@ Single site
       TreeSequence.genetic_relatedness_weighted
       TreeSequence.genetic_relatedness_vector
       TreeSequence.genetic_relatedness_matrix
+      TreeSequence.divergence_matrix
       TreeSequence.general_stat
       TreeSequence.segregating_sites
       TreeSequence.sample_count_stat
diff --git a/docs/stats.md b/docs/stats.md
index 693c363e08..e7a4a83183 100644
--- a/docs/stats.md
+++ b/docs/stats.md
@@ -204,29 +204,43 @@ statistics API in use.
 
 #### Mode
 
-There are three **modes** of statistic: `site`, `branch`, and `node`,
+There are four **modes** of statistic: `site`, `branch`, `mutation`, and `node`,
 that each summarize aspects of the tree sequence in different but related ways.
 Roughly speaking, these answer the following sorts of question:
 
 site
-: How many mutations differentiate these two genomes?
+: At how many sites do these two genomes differ?
 
 branch
 : How long since these genomes' common ancestor?
 
+mutation
+: How many mutations have occurred since these genomes' common ancestor?
+
 node
 : On how much of the genome is each node an ancestor of only one of these genomes, but not both?
 
-These three examples can all be answered in the same way with the tree sequence:
+These examples can all be answered in the same way with the tree sequence:
 first, draw all the paths from one genome to the other through the tree sequence
 (back up to their common ancestor and back down in each marginal tree).
 Then,
-(`site`) count the number of mutations falling on the paths,
+(`site`) check if the allelic state at the endpoints is the same, or
 (`branch`) measure the length of the paths, or
+(`mutation`) count the number of mutations falling on the paths, or
 (`node`) count how often the path goes through each node.
 There is more discussion of this correspondence in the paper describing these statistics,
 and precise definitions are given in each statistic.
 
+Furthermore, since mutations are rare, if two genomes differ in state at a site,
+then it's probably because there was a single mutation.
+This means that `site` and `mutation` are *almost* the same:
+and, they will be identical if there is at most one mutation per site
+and no mutations are silent.
+So, we tend to think of `site` in the same way, as counting numbers of mutations;
+with a small correction due to multiple mutations.
+Furthermore, `mutation` mode is not implemented for many statistics at this point,
+so use `site` to answer the same questions.
+
 Here's an example of using the {meth}`~TreeSequence.diversity` statistic to return the
 average branch length between all pairs of samples:
 
@@ -235,7 +249,7 @@ ts.diversity(mode="branch")
 ```
 
 One important thing to know is that `node` statistics have somewhat different output.
-While `site` and `branch` statistics naturally return one number
+While `site`, `branch`, and `mutation` statistics naturally return one number
 for each portion of the genome (and thus incorporates information about many nodes: see below),
 the `node` statistics return one number **for each node** in the tree sequence (and for each window).
 There can be a lot of nodes in the tree sequence, so beware.
@@ -795,8 +809,9 @@ pairwise) and Patterson's f statistics (which are four-way).
 ### Derived statistics
 
 Most statistics have the property that `mode="branch"` and
-`mode="site"` are "dual" in the sense that they are equal, on average, under
-a high neutral mutation rate. {meth}`~TreeSequence.Fst` and {meth}`~TreeSequence.Tajimas_D`
+`mode="mutation"` are "dual" in the sense that they are equal, on average, 
+and so is `mode="site"` under the infinite sites model of mutation.
+{meth}`~TreeSequence.Fst` and {meth}`~TreeSequence.Tajimas_D`
 do not have this property (since both are ratios of statistics that do have this property).
 
 
diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst
index fb15196ac8..1b7f1f1363 100644
--- a/python/CHANGELOG.rst
+++ b/python/CHANGELOG.rst
@@ -14,6 +14,10 @@
   is not the case.
   (:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`).
 
+- The previous behavior of ``genetic_relatedness_matrix`` with ``mode="site"`` is
+  now provided by ``mode="branch"``, and ``mode="site"`` is not supported.
+  (:user:`petrelharp`, :pr:`3314`)
+
 - Drop Python 3.9 support, require Python >= 3.10 (:pr:`3267`, :user:`benjeffery`)
 
 
@@ -59,6 +63,12 @@
   allowing greater flexibility in "disjoint union" situations.
   (:user:`hyanwong`, :user:`petrelharp`, :issue:`3181`)
 
+- Add a new statistics mode, ``mode="mutation"``, that counts numbers of mutations
+  rather than branch length (``mode="branch"``) or allelic state (``mode="site"``)
+  (:user:`petrelharp`, :pr:`3314`).
+
+- Add ``TreeSequence.divergence_matrix``, which was previously undocumented.
+
 **Bugfixes**
 
 - In some tables with mutations out-of-order ``TableCollection.sort`` did not re-order
@@ -102,6 +112,7 @@
 - ``ltrim``, ``rtrim``, ``trim`` and ``shift`` raise an error if used on a tree sequence
   containing a reference sequence (:user:`hyanwong`, :pr:`3210`, :issue:`2091`)
 
+
 --------------------
 [0.6.4] - 2025-05-21
 --------------------
diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c
index 78cb9f7c8e..54b38d0839 100644
--- a/python/_tskitmodule.c
+++ b/python/_tskitmodule.c
@@ -6442,6 +6442,8 @@ parse_stats_mode(char *mode, tsk_flags_t *ret)
         value = TSK_STAT_BRANCH;
     } else if (strcmp(mode, "node") == 0) {
         value = TSK_STAT_NODE;
+    } else if (strcmp(mode, "mutation") == 0) {
+        value = TSK_STAT_MUTATION;
     } else {
         PyErr_SetString(PyExc_ValueError, "Unrecognised stats mode");
         return -1;
diff --git a/python/tests/test_divmat.py b/python/tests/test_divmat.py
index 892c48c347..61c98156d1 100644
--- a/python/tests/test_divmat.py
+++ b/python/tests/test_divmat.py
@@ -37,7 +37,7 @@
 # ↑ See https://github.com/tskit-dev/tskit/issues/1804 for when
 # we can remove this.
 
-DIVMAT_MODES = ["branch", "site"]
+DIVMAT_MODES = ["branch", "mutation"]
 
 # NOTE: this implementation of Schieber-Vishkin algorithm is done like
 # this so it's easy to run with numba. It would be more naturally
@@ -237,9 +237,14 @@ def branch_divergence_matrix(ts, sample_sets=None, windows=None, span_normalise=
 
 
 def divergence_matrix(
-    ts, windows=None, sample_sets=None, samples=None, mode="site", span_normalise=True
+    ts,
+    windows=None,
+    sample_sets=None,
+    samples=None,
+    mode="mutation",
+    span_normalise=True,
 ):
-    assert mode in ["site", "branch"]
+    assert mode in ["mutation", "branch"]
     if samples is not None and sample_sets is not None:
         raise ValueError("Cannot specify both")
     if samples is None and sample_sets is None:
@@ -249,8 +254,8 @@ def divergence_matrix(
     else:
         assert sample_sets is not None
 
-    if mode == "site":
-        return site_divergence_matrix(
+    if mode == "mutation":
+        return mutation_divergence_matrix(
             ts, sample_sets, windows=windows, span_normalise=span_normalise
         )
     else:
@@ -274,9 +279,12 @@ def stats_api_matrix_method(
     windows=None,
     samples=None,
     sample_sets=None,
-    mode="site",
+    mode="mutation",
     span_normalise=True,
 ):
+    # Until we have mutation mode for general stats:
+    if mode == "mutation":
+        mode = "site"
     if samples is not None and sample_sets is not None:
         raise ValueError("Cannot specify both")
     if samples is None and sample_sets is None:
@@ -344,7 +352,7 @@ def group_alleles(genotypes, num_alleles):
     return A, offsets
 
 
-def site_divergence_matrix(ts, sample_sets, *, windows=None, span_normalise=True):
+def mutation_divergence_matrix(ts, sample_sets, *, windows=None, span_normalise=True):
     windows_specified = windows is not None
     windows = ts.parse_windows(windows)
     num_windows = len(windows) - 1
@@ -403,7 +411,7 @@ def check_divmat(
     verbosity=0,
     compare_stats_api=True,
     compare_lib=True,
-    mode="site",
+    mode="mutation",
 ):
     # print("samples = ", samples, sample_sets)
     # print(ts.draw_text())
@@ -455,6 +463,7 @@ def check_divmat(
 
 
 class TestExamplesWithAnswer:
+
     @pytest.mark.parametrize("mode", DIVMAT_MODES)
     def test_single_tree_zero_samples(self, mode):
         ts = tskit.Tree.generate_balanced(2).tree_sequence
@@ -479,7 +488,7 @@ def test_single_tree_sites_per_branch(self, m):
         #     0         1
         ts = tskit.Tree.generate_balanced(4).tree_sequence
         ts = tsutil.insert_branch_sites(ts, m)
-        D1 = check_divmat(ts, mode="site")
+        D1 = check_divmat(ts, mode="mutation")
         D2 = np.array(
             [
                 [0.0, 2.0, 4.0, 4.0],
@@ -497,7 +506,7 @@ def test_single_tree_unique_sample_alleles(self, n):
         for j in range(n):
             tables.mutations.add_row(site=0, node=j, derived_state=f"{j + 1}")
         ts = tables.tree_sequence()
-        D1 = check_divmat(ts, mode="site")
+        D1 = check_divmat(ts, mode="mutation")
         D2 = np.ones((n, n))
         np.fill_diagonal(D2, 0)
         np.testing.assert_array_equal(D1, D2)
@@ -708,7 +717,7 @@ def test_single_tree_equal_windows(self, num_windows):
     @pytest.mark.parametrize("n", [2, 3, 5])
     def test_single_tree_no_sites(self, n):
         ts = tskit.Tree.generate_balanced(n, span=10).tree_sequence
-        D = check_divmat(ts, mode="site")
+        D = check_divmat(ts, mode="mutation")
         np.testing.assert_array_equal(D, np.zeros((n, n)))
 
 
@@ -954,7 +963,7 @@ def check(
             atol = 1e-11 if has_missing_data else 0
             np.testing.assert_allclose(D1, D2, atol=atol)
         else:
-            assert mode == "site"
+            assert mode == "mutation"
             np.testing.assert_allclose(D1, D2)
 
     @pytest.mark.parametrize("ts", get_example_tree_sequences())
@@ -1350,7 +1359,7 @@ class TestGeneticRelatednessMatrix:
     def check(self, ts, mode, *, sample_sets=None, windows=None, span_normalise=True):
         # These are *only* expected to be the same
         # under infinite-sites mutations
-        if mode == "site" and np.any([len(s.mutations) > 1 for s in ts.sites()]):
+        if mode == "mutation" and np.any([len(s.mutations) > 1 for s in ts.sites()]):
             ts = msprime.sim_mutations(
                 ts,
                 rate=100 / ts.segregating_sites(mode="branch", span_normalise=False),
@@ -1373,6 +1382,16 @@ def check(self, ts, mode, *, sample_sets=None, windows=None, span_normalise=True
         )
         np.testing.assert_array_almost_equal(G1, G2)
 
+    def test_bad_modes(self):
+        ts = tskit.Tree.generate_balanced(4).tree_sequence
+        with pytest.raises(tskit.LibraryError, match="TSK_ERR_UNSUPPORTED_STAT_MODE"):
+            ts.genetic_relatedness_matrix([0, 5], mode="node")
+        with pytest.warns(UserWarning, match="mutation' instead"):
+            with pytest.raises(
+                tskit.LibraryError, match="TSK_ERR_UNSUPPORTED_STAT_MODE"
+            ):
+                ts.genetic_relatedness_matrix([0, 5], mode="site")
+
     @pytest.mark.parametrize("mode", DIVMAT_MODES)
     def test_single_tree(self, mode):
         # 2.00┊    6    ┊
diff --git a/python/tests/test_ld_matrix.py b/python/tests/test_ld_matrix.py
index fa992adec8..9a894e6e26 100644
--- a/python/tests/test_ld_matrix.py
+++ b/python/tests/test_ld_matrix.py
@@ -40,6 +40,7 @@
 import numpy as np
 import pytest
 
+import _tskit
 import tskit
 import tskit.util as util
 from tests import tsutil
@@ -1461,7 +1462,8 @@ def ld_matrix(
     )
 
 
-def get_paper_ex_ts():
+@pytest.fixture(scope="session")
+def paper_ex_ts():
     """Generate the tree sequence example from the tskit paper
 
     Data taken from the tests:
@@ -1567,7 +1569,7 @@ def get_matrix_partitions(n):
 
 # Generate all partitions of the LD matrix, then pass into test_subset
 @pytest.mark.parametrize("partition", get_matrix_partitions(len(PAPER_EX_TRUTH_MATRIX)))
-def test_subset_sites(partition):
+def test_subset_sites(paper_ex_ts, partition):
     """Given a partition of the truth matrix, check that we can successfully
     compute the LD matrix for that given partition, effectively ensuring that
     our handling of site subsets is correct.
@@ -1575,7 +1577,7 @@ def test_subset_sites(partition):
     :param partition: length 2 list of [row_sites, column_sites].
     """
     a, b = partition
-    ts = get_paper_ex_ts()
+    ts = paper_ex_ts
     np.testing.assert_allclose(
         ld_matrix(ts, sites=partition),
         PAPER_EX_TRUTH_MATRIX[a[0] : a[-1] + 1, b[0] : b[-1] + 1],
@@ -1588,7 +1590,7 @@ def test_subset_sites(partition):
 @pytest.mark.parametrize(
     "partition", get_matrix_partitions(len(PAPER_EX_BRANCH_TRUTH_MATRIX))
 )
-def test_subset_positions(partition):
+def test_subset_positions(paper_ex_ts, partition):
     """Given a partition of the truth matrix, check that we can successfully
     compute the LD matrix for that given partition, effectively ensuring that
     our handling of positions is correct. We use the midpoint inside of the
@@ -1597,7 +1599,7 @@ def test_subset_positions(partition):
     :param partition: length 2 list of [row_positions, column_positions].
     """
     a, b = partition
-    ts = get_paper_ex_ts()
+    ts = paper_ex_ts
     bp = ts.breakpoints(as_array=True)
     mid = (bp[1:] + bp[:-1]) / 2
     np.testing.assert_allclose(
@@ -1633,20 +1635,20 @@ def test_bad_positions():
 
 
 @pytest.mark.parametrize("sites", [[0, 1, 2], [1, 2], [0, 1], [0], [1]])
-def test_subset_sites_one_list(sites):
+def test_subset_sites_one_list(paper_ex_ts, sites):
     """Test the case where we only pass only one list of sites to compute. This
     should return a square matrix comparing the sites to themselves.
     """
-    ts = get_paper_ex_ts()
+    ts = paper_ex_ts
     np.testing.assert_equal(ld_matrix(ts, sites=[sites]), ts.ld_matrix(sites=[sites]))
 
 
 @pytest.mark.parametrize("tree_index", [[0, 1, 2], [1, 2], [0, 1], [0], [1]])
-def test_subset_positions_one_list(tree_index):
+def test_subset_positions_one_list(paper_ex_ts, tree_index):
     """Test the case where we only pass only one list of positions to compute. This
     should return a square matrix comparing the positions to themselves.
     """
-    ts = get_paper_ex_ts()
+    ts = paper_ex_ts
     bp = ts.breakpoints(as_array=True)
     mid = (bp[1:] + bp[:-1]) / 2
     np.testing.assert_allclose(
@@ -1674,11 +1676,11 @@ def test_subset_positions_one_list(tree_index):
         ([0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2]),
     ],
 )
-def test_repeated_position_elements(tree_index):
+def test_repeated_position_elements(paper_ex_ts, tree_index):
     """Test that we repeat positions in the LD matrix when we have multiple positions
     that overlap the same tree when specifying positions in branch mode.
     """
-    ts = get_paper_ex_ts()
+    ts = paper_ex_ts
     l, r = tree_index
     bp = ts.breakpoints(as_array=True)
     val, count = np.unique(l, return_counts=True)
@@ -1707,16 +1709,14 @@ def test_repeated_position_elements(tree_index):
 
 
 # Generate all partitions of the samples, producing pairs of sample sets
-@pytest.mark.parametrize(
-    "partition", get_matrix_partitions(get_paper_ex_ts().num_samples)
-)
-def test_sample_sets(partition):
+@pytest.mark.parametrize("partition", get_matrix_partitions(4))
+def test_sample_sets(paper_ex_ts, partition):
     """Test all partitions of sample sets, ensuring that we are correctly
     computing stats for various subsets of the samples in a given tree.
 
     :param partition: length 2 list of [ss_1, ss_2].
     """
-    ts = get_paper_ex_ts()
+    ts = paper_ex_ts
     np.testing.assert_allclose(
         ld_matrix(ts, sample_sets=partition), ts.ld_matrix(sample_sets=partition)
     )
@@ -1776,8 +1776,16 @@ def test_ld_empty_examples(ts):
         ts.ld_matrix(mode="branch")
 
 
-def test_input_validation():
-    ts = get_paper_ex_ts()
+def test_bad_modes(paper_ex_ts):
+    ts = paper_ex_ts
+    with pytest.raises(_tskit.LibraryError, match="UNSUPPORTED_STAT_MODE"):
+        ts.ld_matrix(mode="node")
+    with pytest.raises(_tskit.LibraryError, match="UNSUPPORTED_STAT_MODE"):
+        ts.ld_matrix(mode="mutation")
+
+
+def test_input_validation(paper_ex_ts):
+    ts = paper_ex_ts
     with pytest.raises(ValueError, match="Unknown two-locus statistic"):
         ts.ld_matrix(stat="bad_stat")
 
diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py
index 53d073b6e8..0053cc7be9 100644
--- a/python/tests/test_lowlevel.py
+++ b/python/tests/test_lowlevel.py
@@ -1997,39 +1997,50 @@ def test_divergence_matrix(self):
         windows = [0, ts.get_sequence_length()]
         ids = np.arange(n, dtype=np.int32)
         sizes = np.ones(n, dtype=np.uint64)
-        D = ts.divergence_matrix(windows, sizes, ids)
+        D = ts.divergence_matrix(windows, sizes, ids, mode="mutation")
         assert D.shape == (1, n, n)
-        D = ts.divergence_matrix(windows, sample_set_sizes=[1, 1], sample_sets=[0, 1])
+        D = ts.divergence_matrix(
+            windows, sample_set_sizes=[1, 1], sample_sets=[0, 1], mode="mutation"
+        )
         assert D.shape == (1, 2, 2)
         D = ts.divergence_matrix(
-            windows, sample_set_sizes=[1, 1], sample_sets=[0, 1], span_normalise=True
+            windows,
+            sample_set_sizes=[1, 1],
+            sample_sets=[0, 1],
+            span_normalise=True,
+            mode="mutation",
         )
         assert D.shape == (1, 2, 2)
 
         for bad_node in [-1, -2, 1000]:
             with pytest.raises(_tskit.LibraryError, match="TSK_ERR_NODE_OUT_OF_BOUNDS"):
-                ts.divergence_matrix(windows, [1, 1], [0, bad_node])
+                ts.divergence_matrix(windows, [1, 1], [0, bad_node], mode="mutation")
         with pytest.raises(ValueError, match="Sum of sample_set_sizes"):
-            ts.divergence_matrix(windows, [1, 2], [0, 1])
+            ts.divergence_matrix(windows, [1, 2], [0, 1], mode="mutation")
         with pytest.raises((ValueError, OverflowError), match="Overflow|out of bounds"):
-            ts.divergence_matrix(windows, [-1, 2], [0])
+            ts.divergence_matrix(windows, [-1, 2], [0], mode="mutation")
 
         with pytest.raises(TypeError, match="str"):
-            ts.divergence_matrix(windows, sizes, ids, span_normalise="xdf")
+            ts.divergence_matrix(
+                windows, sizes, ids, span_normalise="xdf", mode="mutation"
+            )
         with pytest.raises(TypeError):
-            ts.divergence_matrix(windoze=[0, 1])
+            ts.divergence_matrix(windoze=[0, 1], mode="mutation")
         with pytest.raises(ValueError, match="at least 2"):
             ts.divergence_matrix(
                 [0],
                 sizes,
                 ids,
+                mode="mutation",
             )
         with pytest.raises(_tskit.LibraryError, match="BAD_WINDOWS"):
-            ts.divergence_matrix([-1, 0, 1], sizes, ids)
+            ts.divergence_matrix([-1, 0, 1], sizes, ids, mode="mutation")
         with pytest.raises(ValueError, match="Unrecognised stats mode"):
             ts.divergence_matrix([0, 1], sizes, ids, mode="sdf")
         with pytest.raises(_tskit.LibraryError, match="UNSUPPORTED_STAT_MODE"):
             ts.divergence_matrix([0, 1], sizes, ids, mode="node")
+        with pytest.raises(_tskit.LibraryError, match="UNSUPPORTED_STAT_MODE"):
+            ts.divergence_matrix([0, 1], sizes, ids, mode="site")
 
     def test_load_tables_build_indexes(self):
         for ts in self.get_example_tree_sequences():
diff --git a/python/tests/test_relatedness_vector.py b/python/tests/test_relatedness_vector.py
index ebb74cf3a5..cd902b2afa 100644
--- a/python/tests/test_relatedness_vector.py
+++ b/python/tests/test_relatedness_vector.py
@@ -1179,7 +1179,7 @@ def test_modes(self):
             sequence_length=10,
             random_seed=123,
         )
-        for bad_mode in ("site", "node"):
+        for bad_mode in ("site", "node", "mutation"):
             with pytest.raises(
                 tskit.LibraryError, match="TSK_ERR_UNSUPPORTED_STAT_MODE"
             ):
diff --git a/python/tests/test_tree_stats.py b/python/tests/test_tree_stats.py
index 39b6d70e34..6a07a3de84 100644
--- a/python/tests/test_tree_stats.py
+++ b/python/tests/test_tree_stats.py
@@ -4566,6 +4566,20 @@ def verify(self, ts):
                 self.assertArrayAlmostEqual(afs_sum, tbl)
 
 
+class TestAlleleFrequencySpectrumErrors:
+
+    def test_errors(self):
+        ts = msprime.simulate(10, recombination_rate=2, random_seed=1)
+        with pytest.raises(
+            exceptions.LibraryError, match="TSK_ERR_UNSUPPORTED_STAT_MODE"
+        ):
+            _ = ts.allele_frequency_spectrum(mode="node")
+        with pytest.raises(
+            exceptions.LibraryError, match="TSK_ERR_UNSUPPORTED_STAT_MODE"
+        ):
+            _ = ts.allele_frequency_spectrum(mode="mutation")
+
+
 ############################################
 # End of specific stats tests.
 ############################################
diff --git a/python/tskit/trees.py b/python/tskit/trees.py
index 1e31048075..914ee13a3b 100644
--- a/python/tskit/trees.py
+++ b/python/tskit/trees.py
@@ -8659,52 +8659,6 @@ def _parse_stat_matrix_sample_sets(ids):
             sizes = np.array(sizes, dtype=size_dtype)
         return flat, sizes
 
-    # def divergence_matrix(self, sample_sets, windows=None, mode="site"):
-    #     """
-    #     Finds the mean divergence  between pairs of samples from each set of
-    #     samples and in each window. Returns a numpy array indexed by (window,
-    #     sample_set, sample_set).  Diagonal entries are corrected so that the
-    #     value gives the mean divergence for *distinct* samples, but it is not
-    #     checked whether the sample_sets are disjoint (so offdiagonals are not
-    #     corrected).  For this reason, if an element of `sample_sets` has only
-    #     one element, the corresponding diagonal will be NaN.
-
-    #     The mean divergence between two samples is defined to be the mean: (as
-    #     a TreeStat) length of all edges separating them in the tree, or (as a
-    #     SiteStat) density of segregating sites, at a uniformly chosen position
-    #     on the genome.
-
-    #     :param list sample_sets: A list of sets of IDs of samples.
-    #     :param iterable windows: The breakpoints of the windows (including start
-    #         and end, so has one more entry than number of windows).
-    #     :return: A list of the upper triangle of mean TMRCA values in row-major
-    #         order, including the diagonal.
-    #     """
-    #     ns = len(sample_sets)
-    #     indexes = [(i, j) for i in range(ns) for j in range(i, ns)]
-    #     x = self.divergence(sample_sets, indexes, windows, mode=mode)
-    #     nw = len(windows) - 1
-    #     A = np.ones((nw, ns, ns), dtype=float)
-    #     for w in range(nw):
-    #         k = 0
-    #         for i in range(ns):
-    #             for j in range(i, ns):
-    #                 A[w, i, j] = A[w, j, i] = x[w][k]
-    #                 k += 1
-    #     return A
-    # NOTE: see older definition of divmat here, which may be useful when documenting
-    # this function. See https://github.com/tskit-dev/tskit/issues/2781
-
-    # NOTE for documentation of sample_sets. We *must* use samples currently because
-    # the normalisation for non-sample nodes is tricky. Do we normalise by the
-    # total span of the ts where the node is 'present' in the tree? We avoid this
-    # by insisting on sample nodes.
-
-    # NOTE for documentation of num_threads. Need to explain that the
-    # its best to think of as the number of background *worker* threads.
-    # default is to run without any worker threads. If you want to run
-    # with all the cores on the machine, use num_threads=os.cpu_count().
-
     def divergence_matrix(
         self,
         sample_sets=None,
@@ -8714,9 +8668,46 @@ def divergence_matrix(
         mode=None,
         span_normalise=True,
     ):
+        """
+        Finds the matrix of pairwise mean divergence values between groups
+        of sample nodes. Returns a numpy array indexed by (window,
+        sample_set, sample_set): the [k,i,j]th value of the result gives the
+        mean divergence between pairs of samples from the i-th and j-th
+        sample sets in the k-th window. Diagonal entries are corrected so that the
+        value gives the mean divergence for *distinct* samples, but it is not
+        checked whether the sample_sets are disjoint (so offdiagonals are not
+        corrected).  For this reason, if an element of `sample_sets` has only
+        one element, the corresponding diagonal will be NaN.
+
+        The mean divergence between two samples is defined to be the mean
+        length of all edges separating them in the tree at a uniformly chosen position
+        on the genome, where "length" is measured either in terms of time
+        (with `mode="branch"`) or in terms of number of mutations
+        (with `mode="mutation"`).
+
+        :param list sample_sets: A list of sets of IDs of samples.
+        :param list windows: The breakpoints of the windows (including start
+            and end, so has one more entry than number of windows).
+        :param str mode: A string giving the "type" of the statistic to be computed
+            (defaults to "mutation"; "site" and "node" not supported).
+        :return: A list of the upper triangle of mean TMRCA values in row-major
+            order, including the diagonal.
+        """
+        # NOTE: see older definition of divmat here, which may be useful when documenting
+        # this function. See https://github.com/tskit-dev/tskit/issues/2781
+
+        # NOTE for documentation of sample_sets. We *must* use samples currently because
+        # the normalisation for non-sample nodes is tricky. Do we normalise by the
+        # total span of the ts where the node is 'present' in the tree? We avoid this
+        # by insisting on sample nodes.
+
+        # NOTE for documentation of num_threads. Need to explain that the
+        # its best to think of as the number of background *worker* threads.
+        # default is to run without any worker threads. If you want to run
+        # with all the cores on the machine, use num_threads=os.cpu_count().
         windows_specified = windows is not None
         windows = self.parse_windows(windows)
-        mode = "site" if mode is None else mode
+        mode = "mutation" if mode is None else mode
 
         if sample_sets is None:
             sample_sets = self.samples()
@@ -8921,50 +8912,51 @@ def genetic_relatedness_matrix(
         """
         Computes the full matrix of pairwise genetic relatedness values
         between (and within) pairs of sets of nodes from ``sample_sets``.
-        *Warning:* this does not compute exactly the same thing as
-        :meth:`.genetic_relatedness`: see below for more details.
-
-        If `mode="branch"`, then the value obtained is the same as that from
-        :meth:`.genetic_relatedness`, using the options `centre=True` and
-        `proportion=False`. The same is true if `mode="site"` and all sites have
-        at most one mutation.
-
-        However, if some sites have more than one mutation, the value may differ.
-        The reason is that this function (for efficiency) computes relatedness
-        using :meth:`.divergence` and the following relationship.
-        "Relatedness" measures the number of *shared* alleles (or branches),
-        while "divergence" measures the number of *non-shared* alleles (or branches).
-        Let :math:`T_i` be the total distance from sample :math:`i` up to the root;
-        then if :math:`D_{ij}` is the divergence between :math:`i` and :math:`j`
-        and :math:`R_{ij}` is the relatedness between :math:`i` and :math:`j`, then
-        :math:`T_i + T_j = D_{ij} + 2 R_{ij}.`
-        So, for any samples :math:`I`, :math:`J`, :math:`S`, :math:`T`
-        (that may now be random choices),
+        Values are the same as produced by :meth:`.genetic_relatedness`,
+        using the options `centre=True` and `proportion=False`.
+
+        This function, computes relatedness using :meth:`.divergence_matrix`
+        and the following relationship.  "Relatedness" measures the number of
+        *shared* mutations (or branches), while "divergence" measures the
+        number of *non-shared* alleles (or branches).  Let :math:`T_i` be the
+        total distance from sample :math:`i` up to the root; then if
+        :math:`D_{ij}` is the divergence between :math:`i` and :math:`j` and
+        :math:`R_{ij}` is the relatedness between :math:`i` and :math:`j`, then
+        :math:`T_i + T_j = D_{ij} + 2 R_{ij}.` So, for any samples :math:`I`,
+        :math:`J`, :math:`S`, :math:`T` (that may now be random choices),
         :math:`R_{IJ}-R_{IS}-R_{JT}+R_{ST} = (D_{IJ}-D_{IS}-D_{JT}+D_{ST})/ (-2)`.
-        Note, however, that this relationship only holds for `mode="site"`
+        Note, however, that this relationship only holds for genotypes
         if we can treat "number of differing alleles" as distances on the tree;
         this is not necessarily the case in the presence of multiple mutations.
+        By using `mode="mutation"`, we count distances as "number of mutations",
+        and using `mode="branch"`, we count distances using time.
+        On the other hand, `mode="site"` looks at differences in allelic state;
+        this is not compatible with the underlying efficient algorithm,
+        so is not supported.
 
-        Another caveat in the above relationship between :math:`R` and :math:`D`
-        is that :meth:`.divergence` of a sample set to itself does not include
-        the "self" comparisons (so as to provide an unbiased estimator of a
-        population quantity), while the usual definition of genetic relatedness
-        *does* include such comparisons (to provide, for instance, an appropriate
-        value for prospective results beginning with only a given set of
-        individuals).
+        Note that this function defaults to ``mode="mutation"``, unlike
+        :meth:`.genetic_relatedness`, which defaults to ``mode="site"``,
+        so there will be minor differences in default values in the presence
+        of multiple mutations per site.
 
         :param list sample_sets: A list of lists of Node IDs, specifying the
             groups of nodes to compute the statistic with.
         :param list windows: An increasing list of breakpoints between the windows
             to compute the statistic in.
         :param str mode: A string giving the "type" of the statistic to be computed
-            (defaults to "site").
+            (defaults to "mutation"; "site" and "node" are not supported).
         :param bool span_normalise: Whether to divide the result by the span of the
             window (defaults to True). Has no effect if ``proportion`` is True.
         :return: A ndarray with shape equal to (num windows, num statistics).
             If there is one pair of sample sets and windows=None, a numpy scalar is
             returned.
         """
+        if mode == "site":
+            warnings.warn(
+                "Passing mode='site' to genetic_relatedness_matrix is "
+                "no longer allowed; please pass mode='mutation' instead.",
+                stacklevel=4,
+            )
         D = self.divergence_matrix(
             sample_sets,
             windows=windows,