Skip to content

Add dict_of_dicts to tree sequence #1296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

**Features**

- New `TreeSequence.as_dict_of_dicts` method for reading an entire tree sequence into
a NetworkX graph (:user:`hyanwong`, :issue:`1294`, :pr:`1296`).

- Add `Table.append` method for adding rows from classes such as `SiteTableRow` and
`Site` (:user:`benjeffery`, :issue:`1111`, :pr:`1254`).

Expand Down
47 changes: 47 additions & 0 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,53 @@ def test_tree_node_edges(self):
assert edge.right >= tree.interval.right
assert np.all(edge_visited)

def test_as_dict_of_dicts(self):
for ts in get_example_tree_sequences():
ts = ts.simplify(keep_unary=True) # remove unreferenced nodes
adj_dod = ts.as_dict_of_dicts()
g = nx.from_dict_of_dicts(
adj_dod, create_using=nx.MultiDiGraph, multigraph_input=True
)
self.verify_nx_graph_topology(ts, g)

def verify_nx_graph_topology(self, ts, g):
assert nx.is_directed_acyclic_graph(g)
assert g.number_of_edges() == ts.num_edges
# isolated nodes may not be in the graph, so add sample nodes to the graph set
assert set(g.nodes) | set(ts.samples()) == set(range(ts.num_nodes))
# all non root nodes
assert set(ts.tables.edges.child) == {n for n in g.nodes if g.in_degree(n) != 0}
# all non leaf nodes
assert set(ts.tables.edges.parent) == {
n for n in g.nodes if g.out_degree(n) != 0
}

def test_nx_tree_ts_equiv(self):
ts = msprime.simulate(10, recombination_rate=1, random_seed=123)
adj_dod = ts.as_dict_of_dicts()
g_first = nx.from_dict_of_dicts(
adj_dod, create_using=nx.MultiDiGraph, multigraph_input=True
)
g_last = g_first.copy()
g_first.remove_edges_from(
[
(u, v, k)
for u, v, k, d in g_first.edges(keys=True, data=True)
if d["left"] != 0
]
)
g_last.remove_edges_from(
[
(u, v, k)
for u, v, k, d in g_last.edges(keys=True, data=True)
if d["right"] != ts.sequence_length
]
)
t_first = nx.DiGraph(ts.first().as_dict_of_dicts())
t_last = nx.DiGraph(ts.last().as_dict_of_dicts())
assert set(nx.DiGraph(g_first).edges) == set(t_first.edges)
assert set(nx.DiGraph(g_last).edges) == set(t_last.edges)


class TestTreeSequenceMethodSignatures:
ts = msprime.simulate(10, random_seed=1234)
Expand Down
48 changes: 48 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -4992,6 +4992,54 @@ def to_macs(self):
)
return "\n".join(output) + "\n"

def as_dict_of_dicts(self):
"""
Return the edges in this tree sequence as a dict of dicts for conversion to a
`networkx graph <https://networkx.github.io/documentation/stable/
reference/classes/digraph.html>`_. Note that because multiple edges can exist
between the same parent and child (if they span disjoint genomic regions),
then you have to read in the dict_of_dicts as a MultiDiGraph (or MultiGraph),
by specifying ``create_using=nx.MultiDiGraph, multigraph_input=True``

For example::

>>> import tskit
>>> import networkx as nx
>>> ts = tskit.Tree.generate_star(10).tree_sequence
>>> graph = nx.from_dict_of_dicts(
ts.as_dict_of_dicts(),
create_using=nx.MultiDiGraph,
multigraph_input=True)
>>> graph.edges(keys=True, data=True) # List the edges
>>> root_node_id = 10
>>> leaf_node_id = 0
>>> edge_id = 0
>>> graph.edges[root_node_id, leaf_node_id, edge_id] # get a graph edge

:return: Dictionary of dictionaries of dictionaries, where the first
key is the edge parent, the second key is the edge child, and the
third key is the edge ID (treated as a NetworkX "key" to distinguish
this edge among the possibly multiple edges per parent-child pair).
The contents of the dict-of-dicts-of-dicts are themselves edge
annotation dictionaries, currently containing the keys "left", "right",
and "branch_length" (the time between the parent and child nodes),
which will be imported as annotations on each edge.
"""
dod = {}
for edge in self.edges():
parent = edge.parent
child = edge.child
if parent not in dod:
dod[parent] = {}
if child not in dod[parent]:
dod[parent][child] = {}
dod[parent][child][edge.id] = {
"left": edge.left,
"right": edge.right,
"branch_length": self.node(parent).time - self.node(child).time,
}
return dod

def simplify(
self,
samples=None,
Expand Down