diff --git a/docs/python-api.md b/docs/python-api.md index 1713c1344d..7129be92f0 100644 --- a/docs/python-api.md +++ b/docs/python-api.md @@ -597,6 +597,7 @@ Functions and static methods ```{eval-rst} .. autosummary:: Tree.kc_distance + Tree.rf_distance ``` (sec_python_api_trees_balance)= diff --git a/python/tests/test_distance_metrics.py b/python/tests/test_distance_metrics.py index d4ca30a85c..c08351962f 100644 --- a/python/tests/test_distance_metrics.py +++ b/python/tests/test_distance_metrics.py @@ -1420,6 +1420,10 @@ def test_ignores_subtrees_with_no_samples(self): assert t1.kc_distance(t2, 1) == 0 +# Test the RF distance metrics: +# TODO: integrate with the KC tests + + class TestTreeSameSamples: # Tree1 # 2.00┊ 6 ┊ @@ -1569,7 +1573,7 @@ def tree(self): return tables.tree_sequence().first() def test_rf_distance(self): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="single root"): self.tree().rf_distance(self.tree()) diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 79ca182d67..c4c165529a 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -2965,15 +2965,30 @@ def _get_sample_sets(self): def rf_distance(self, other): """ - Returns the Robinson-Foulds distance between the specified pair of trees. - - .. seealso:: - See `Robinson & Foulds (1981) - `_ for more details. - - :param Tree other: The other tree to compare to. - :return: The computed Robinson-Foulds distance between this tree and other. + Returns the (unweighted) Robinson-Foulds distance between the specified pair + of trees, where corresponding samples between the two trees are identified by + node ID. The Robinson-Foulds distance (also known as the symmetric difference) + is defined as the number of bipartitions that are present in one tree but + not the other (see + `Robinson & Foulds (1981) `_). + This method returns the unnormalised RF distance: if the + trees are strictly bifurcating, i.e. binary, the value can be + normalised by dividing by the maximum, which is $2n-4$ for two rooted + trees of $n$ samples (however, if the trees contain polytomies, the maximum + RF distance is less easily defined). + + ..note:: + The RF distance can be sensitive to small changes in topology: in some + cases, changing the position of a single leaf can result in the maximum + RF distance. Therefore even if adjacent trees in a tree sequence differ + by a single subtree-prune-and-regraft operation, the RF distance + between them can be large. + + :param Tree other: The other tree to compare to. Trees are treated as rooted. + :return: The unweighted Robinson-Foulds distance between this tree and ``other``. :rtype: int + :raises ValueError: If either tree has multiple roots, or the trees have + different sample nodes. """ if self.num_roots != 1 or other.num_roots != 1: raise ValueError("Trees must have a single root")