Skip to content

Commit

Permalink
Add documentation to rf_distance
Browse files Browse the repository at this point in the history
Fixes #2992
  • Loading branch information
hyanwong committed Oct 16, 2024
1 parent bc22730 commit 2f54259
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 9 deletions.
1 change: 1 addition & 0 deletions docs/python-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,7 @@ Functions and static methods
```{eval-rst}
.. autosummary::
Tree.kc_distance
Tree.rf_distance
```

(sec_python_api_trees_balance)=
Expand Down
6 changes: 5 additions & 1 deletion python/tests/test_distance_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,10 @@ def test_ignores_subtrees_with_no_samples(self):
assert t1.kc_distance(t2, 1) == 0


# Test the RF distance metrics:
# TODO: integrate with the KC tests


class TestTreeSameSamples:
# Tree1
# 2.00┊ 6 ┊
Expand Down Expand Up @@ -1569,7 +1573,7 @@ def tree(self):
return tables.tree_sequence().first()

def test_rf_distance(self):
with pytest.raises(ValueError):
with pytest.raises(ValueError, match="single root"):
self.tree().rf_distance(self.tree())


Expand Down
31 changes: 23 additions & 8 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -2965,15 +2965,30 @@ def _get_sample_sets(self):

def rf_distance(self, other):
"""
Returns the Robinson-Foulds distance between the specified pair of trees.
.. seealso::
See `Robinson & Foulds (1981)
<https://doi.org/10.1016/0025-5564(81)90043-2>`_ for more details.
:param Tree other: The other tree to compare to.
:return: The computed Robinson-Foulds distance between this tree and other.
Returns the (unweighted) Robinson-Foulds distance between the specified pair
of trees, where corresponding samples between the two trees are identified by
node ID. The Robinson-Foulds distance (also known as the symmetric difference)
is defined as the number of bipartitions that are present in one tree but
not the other (see
`Robinson & Foulds (1981) <https://doi.org/10.1016/0025-5564(81)90043-2>`_).
This method returns the unnormalised RF distance: if the
trees are strictly bifurcating, i.e. binary, the value can be
normalised by dividing by the maximum, which is $2n-4$ for two rooted
trees of $n$ samples (however, if the trees contain polytomies, the maximum
RF distance is less easily defined).
..note::
The RF distance can be sensitive to small changes in topology: in some
cases, changing the position of a single leaf can result in the maximum
RF distance. Therefore even if adjacent trees in a tree sequence differ
by a single subtree-prune-and-regraft operation, the RF distance
between them can be large.
:param Tree other: The other tree to compare to. Trees are treated as rooted.
:return: The unweighted Robinson-Foulds distance between this tree and ``other``.
:rtype: int
:raises ValueError: If either tree has multiple roots, or the trees have
different sample nodes.
"""
if self.num_roots != 1 or other.num_roots != 1:
raise ValueError("Trees must have a single root")
Expand Down

0 comments on commit 2f54259

Please sign in to comment.