Skip to content

Commit 00967cb

Browse files
Convert nexus tests to use dendropy
Closes #1785 Add dendropy to various requirements lists Also refactor the tests to use pytest a little better
1 parent d7514b8 commit 00967cb

File tree

6 files changed

+61
-45
lines changed

6 files changed

+61
-45
lines changed

python/CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@
3737
- For ``TreeSequence.samples`` all arguments after ``population`` are now keyword only
3838
(:user:`benjeffery`, :issue:`1715`, :pr:`1831`).
3939

40+
- Fix bugs in the format produced by ``TreeSequence.to_nexus`` to make
41+
it standards-compliant. (:user:`jeetsukumaran`, :user:`jeromekelleher`,
42+
:issue:`1785`, :pr:`1835`, :pr:`1836`)
43+
[FIXME MORE UPDATES HERE AS WE CHANGE THE LABELS ETC]
44+
4045
**Features**
4146

4247
- Allow skipping of site and mutation tables in ``TableCollection.sort``

python/requirements/CI-complete/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
biopython==1.79
22
black==21.9b0
33
coverage==6.0.2
4+
dendropy==4.5.2
45
flake8==4.0.1
56
h5py==3.4.0
67
jsonschema==3.2.0

python/requirements/CI-tests-pip/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ svgwrite==1.4.1
66
portion==2.1.6
77
xmlunittest==0.5.0
88
biopython==1.79
9+
dendropy==4.5.2
910
kastore==0.3.1
1011
networkx==2.6.2
1112
msgpack==1.0.2

python/requirements/development.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ black
44
breathe
55
codecov
66
coverage
7+
dendropy
78
flake8
89
h5py>=2.6.0
910
jsonschema>=3.0.0

python/requirements/development.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies:
1010
- codecov
1111
- coverage
1212
- cunit
13+
- dendropy
1314
- doxygen
1415
- flake8
1516
- h5py>=2.6.0

python/tests/test_phylo_formats.py

Lines changed: 52 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# MIT License
22
#
3-
# Copyright (c) 2018-2019 Tskit Developers
3+
# Copyright (c) 2018-2021 Tskit Developers
44
# Copyright (c) 2017 University of Oxford
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -23,19 +23,16 @@
2323
"""
2424
Tests for the newick output feature.
2525
"""
26-
import itertools
27-
import unittest
28-
26+
import dendropy
2927
import msprime
3028
import newick
3129
import pytest
32-
from Bio.Nexus import Nexus
3330

3431
import tskit
3532
from tests import tsutil
3633

3734

38-
class TreeExamples(unittest.TestCase):
35+
class TreeExamples:
3936
"""
4037
Generates trees for testing the phylo format outputs.
4138
"""
@@ -154,8 +151,10 @@ def verify_newick_topology(
154151
name = leaf_labels[u]
155152
node = newick_tree.get_node(name)
156153
while u != root:
157-
branch_len = tree.branch_length(u) if include_branch_lengths else None
158-
self.assertAlmostEqual(node.length, branch_len)
154+
if include_branch_lengths:
155+
assert node.length == pytest.approx(tree.branch_length(u))
156+
else:
157+
assert node.length is None
159158
node = node.ancestor
160159
u = tree.parent(u)
161160
assert node.ancestor is None
@@ -245,48 +244,56 @@ def test_no_lengths_equiv(self):
245244
class TestNexus(TreeExamples):
246245
"""
247246
Tests that the nexus output has the properties that we need using
248-
external Nexus parser.
247+
an external Nexus parser.
249248
"""
250249

251-
def verify_tree(self, nexus_tree, tsk_tree):
252-
assert len(nexus_tree.get_terminals()) == tsk_tree.num_samples()
253-
254-
bio_node_map = {}
255-
for node_id in nexus_tree.all_ids():
256-
bio_node = nexus_tree.node(node_id)
257-
bio_node_map[bio_node.data.taxon] = bio_node
250+
def verify_tree(self, tsk_tree, dpy_tree):
251+
"""
252+
Checks that the specified Dendropy tree is equal to the specified
253+
tskit tree, up to the limits imposed by newick.
254+
"""
255+
label_map = {}
256+
for node in dpy_tree:
257+
label_map[str(node.taxon.label)] = node
258258

259-
for u in tsk_tree.nodes():
259+
def get_label(u):
260260
node = tsk_tree.tree_sequence.node(u)
261-
label = f"tsk_{node.id}_{node.flags}"
262-
bio_node = bio_node_map.pop(label)
263-
self.assertAlmostEqual(
264-
bio_node.data.branchlength, tsk_tree.branch_length(u)
265-
)
266-
if tsk_tree.parent(u) == tskit.NULL:
267-
assert bio_node.prev is None
261+
return f"tsk_{node.id}_{node.flags}"
262+
263+
for u in tsk_tree.nodes(order="postorder"):
264+
# Consume the nodes in the dendropy node map one-by-one
265+
dpy_node = label_map.pop(get_label(u))
266+
parent = tsk_tree.parent(u)
267+
if parent == tskit.NULL:
268+
assert dpy_node.edge_length is None
269+
assert dpy_node.parent_node is None
268270
else:
269-
bio_node_parent = nexus_tree.node(bio_node.prev)
270-
parent = tsk_tree.tree_sequence.node(tsk_tree.parent(u))
271-
assert bio_node_parent.data.taxon == f"tsk_{parent.id}_{parent.flags}"
272-
assert len(bio_node_map) == 0
273-
274-
def verify_nexus_topology(self, treeseq):
275-
nexus = treeseq.to_nexus(precision=16)
276-
nexus_treeseq = Nexus.Nexus(nexus)
277-
assert treeseq.num_trees == len(nexus_treeseq.trees)
278-
for tree, nexus_tree in itertools.zip_longest(
279-
treeseq.trees(), nexus_treeseq.trees
280-
):
281-
name = nexus_tree.name
282-
split_name = name.split("_")
283-
assert len(split_name) == 2
284-
start = float(split_name[0][4:])
285-
end = float(split_name[1])
286-
self.assertAlmostEqual(tree.interval.left, start)
287-
self.assertAlmostEqual(tree.interval.right, end)
288-
289-
self.verify_tree(nexus_tree, tree)
271+
assert tsk_tree.branch_length(u) == pytest.approx(dpy_node.edge_length)
272+
assert dpy_node.parent_node is label_map[get_label(parent)]
273+
274+
assert len(label_map) == 0
275+
276+
def verify_nexus_topology(self, ts):
277+
nexus = ts.to_nexus(precision=16)
278+
tree_list = dendropy.TreeList()
279+
tree_list.read(
280+
data=nexus,
281+
schema="nexus",
282+
preserve_underscores=True, # TODO remove this when we update labels
283+
rooting="default-rooted", # Remove this when we have root marking, #1815
284+
suppress_internal_node_taxa=False,
285+
)
286+
assert ts.num_trees == len(tree_list)
287+
for tsk_tree, dpy_tree in zip(ts.trees(), tree_list):
288+
# https://github.com/tskit-dev/tskit/issues/1815
289+
# FIXME this label should probably start with "[&R]" and
290+
# use some other separator than "_" to delimit the
291+
# left and right coords. Should we use a "weight" instead?
292+
assert dpy_tree.label.startswith("tree")
293+
left, right = map(float, dpy_tree.label[4:].split("_"))
294+
assert tsk_tree.interval.left == pytest.approx(left)
295+
assert tsk_tree.interval.right == pytest.approx(right)
296+
self.verify_tree(tsk_tree, dpy_tree)
290297

291298
def test_binary_tree(self):
292299
ts = self.get_binary_example()

0 commit comments

Comments
 (0)