Skip to content

Added parallel kruskal algorithm #184

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pydatastructs/graphs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from .algorithms import (
breadth_first_search,
breadth_first_search_parallel,
minimum_spanning_tree
minimum_spanning_tree,
minimum_spanning_tree_parallel
)

__all__.extend(algorithms.__all__)
107 changes: 92 additions & 15 deletions pydatastructs/graphs/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
from pydatastructs.utils import GraphEdge
from pydatastructs.miscellaneous_data_structures import DisjointSetForest
from pydatastructs.graphs.graph import Graph
from pydatastructs.linear_data_structures.algorithms import merge_sort_parallel

__all__ = [
'breadth_first_search',
'breadth_first_search_parallel',
'minimum_spanning_tree'
'minimum_spanning_tree',
'minimum_spanning_tree_parallel'
]

def breadth_first_search(
Expand Down Expand Up @@ -190,36 +192,109 @@ def _breadth_first_search_parallel_adjacency_list(

_breadth_first_search_parallel_adjacency_matrix = _breadth_first_search_parallel_adjacency_list

def _generate_mst_object(graph):
mst = Graph(*[getattr(graph, str(v)) for v in graph.vertices])
return mst

def _sort_edges(graph, num_threads=None):
edges = list(graph.edge_weights.items())
if num_threads is None:
sort_key = lambda item: item[1].value
return sorted(edges, key=sort_key)

merge_sort_parallel(edges, num_threads,
comp=lambda u,v: u[1].value <= v[1].value)
return edges

def _minimum_spanning_tree_kruskal_adjacency_list(graph):
mst = Graph(*[getattr(graph, v) for v in graph.vertices])
sort_key = lambda item: item[1].value
mst = _generate_mst_object(graph)
dsf = DisjointSetForest()
for v in graph.vertices:
dsf.make_set(v)
for _, edge in sorted(graph.edge_weights.items(), key=sort_key):
for _, edge in _sort_edges(graph):
u, v = edge.source.name, edge.target.name
if dsf.find_root(u) is not dsf.find_root(v):
mst.add_edge(u, v, edge.value)
dsf.union(u, v)
return mst

def _minimum_spanning_tree_kruskal_adjacency_matrix(graph):
mst = Graph(*[getattr(graph, str(v)) for v in graph.vertices])
sort_key = lambda item: item[1].value
_minimum_spanning_tree_kruskal_adjacency_matrix = \
_minimum_spanning_tree_kruskal_adjacency_list

def minimum_spanning_tree(graph, algorithm):
"""
Computes a minimum spanning tree for the given
graph and algorithm.

Parameters
==========

graph: Graph
The graph whose minimum spanning tree
has to be computed.
algorithm: str
The algorithm which should be used for
computing a minimum spanning tree.
Currently the following algorithms are
supported,
'kruskal' -> Kruskal's algorithm as given in
[1].

Returns
=======

mst: Graph
A minimum spanning tree using the implementation
same as the graph provided in the input.

Examples
========

>>> from pydatastructs import Graph, AdjacencyListGraphNode
>>> from pydatastructs import minimum_spanning_tree
>>> u = AdjacencyListGraphNode('u')
>>> v = AdjacencyListGraphNode('v')
>>> G = Graph(u, v)
>>> G.add_edge(u.name, v.name, 3)
>>> mst = minimum_spanning_tree(G, 'kruskal')
>>> u_n = mst.neighbors(u.name)
>>> mst.get_edge(u.name, u_n[0].name).value
3

References
==========

.. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm
"""
import pydatastructs.graphs.algorithms as algorithms
func = "_minimum_spanning_tree_" + algorithm + "_" + graph._impl
if not hasattr(algorithms, func):
raise NotImplementedError(
"Currently %s algoithm for %s implementation of graphs "
"isn't implemented for finding minimum spanning trees."
%(algorithm, graph._impl))
return getattr(algorithms, func)(graph)

def _minimum_spanning_tree_parallel_kruskal_adjacency_list(graph, num_threads):
mst = _generate_mst_object(graph)
dsf = DisjointSetForest()
for v in graph.vertices:
dsf.make_set(v)
for _, edge in sorted(graph.edge_weights.items(), key=sort_key):
edges = _sort_edges(graph, num_threads)
for _, edge in edges:
u, v = edge.source.name, edge.target.name
if dsf.find_root(u) is not dsf.find_root(v):
mst.add_edge(u, v, edge.value)
dsf.union(u, v)
return mst

def minimum_spanning_tree(graph, algorithm):
_minimum_spanning_tree_parallel_kruskal_adjacency_matrix = \
_minimum_spanning_tree_parallel_kruskal_adjacency_list

def minimum_spanning_tree_parallel(graph, algorithm, num_threads):
"""
Computes a minimum spanning tree for the given
graph and algorithm.
graph and algorithm using the given number of threads.

Parameters
==========
Expand All @@ -234,6 +309,8 @@ def minimum_spanning_tree(graph, algorithm):
supported,
'kruskal' -> Kruskal's algorithm as given in
[1].
num_threads: int
The number of threads to be used.

Returns
=======
Expand All @@ -246,26 +323,26 @@ def minimum_spanning_tree(graph, algorithm):
========

>>> from pydatastructs import Graph, AdjacencyListGraphNode
>>> from pydatastructs import minimum_spanning_tree
>>> from pydatastructs import minimum_spanning_tree_parallel
>>> u = AdjacencyListGraphNode('u')
>>> v = AdjacencyListGraphNode('v')
>>> G = Graph(u, v)
>>> G.add_edge(u.name, v.name, 3)
>>> mst = minimum_spanning_tree(G, 'kruskal')
>>> mst = minimum_spanning_tree_parallel(G, 'kruskal', 3)
>>> u_n = mst.neighbors(u.name)
>>> mst.get_edge(u.name, u_n[0].name).value
3

References
==========

.. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm
.. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm#Parallel_algorithm
"""
import pydatastructs.graphs.algorithms as algorithms
func = "_minimum_spanning_tree_" + algorithm + "_" + graph._impl
func = "_minimum_spanning_tree_parallel_" + algorithm + "_" + graph._impl
if not hasattr(algorithms, func):
raise NotImplementedError(
"Currently %s algoithm for %s implementation of graphs "
"isn't implemented for finding minimum spanning trees."
%(algorithm, graph._impl))
return getattr(algorithms, func)(graph)
return getattr(algorithms, func)(graph, num_threads)
30 changes: 29 additions & 1 deletion pydatastructs/graphs/tests/test_algorithms.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pydatastructs import (breadth_first_search, Graph,
breadth_first_search_parallel, minimum_spanning_tree)
breadth_first_search_parallel, minimum_spanning_tree,
minimum_spanning_tree_parallel)


def test_breadth_first_search():
Expand Down Expand Up @@ -148,3 +149,30 @@ def _test_minimum_spanning_tree(ds, algorithm):

_test_minimum_spanning_tree("List", "kruskal")
_test_minimum_spanning_tree("Matrix", "kruskal")

def test_minimum_spanning_tree_parallel():

def _test_minimum_spanning_tree_parallel(ds, algorithm):
import pydatastructs.utils.misc_util as utils
GraphNode = getattr(utils, "Adjacency" + ds + "GraphNode")
a, b, c, d, e = [GraphNode(x) for x in [0, 1, 2, 3, 4]]
graph = Graph(a, b, c, d, e)
graph.add_edge(a.name, c.name, 10)
graph.add_edge(c.name, a.name, 10)
graph.add_edge(a.name, d.name, 7)
graph.add_edge(d.name, a.name, 7)
graph.add_edge(c.name, d.name, 9)
graph.add_edge(d.name, c.name, 9)
graph.add_edge(d.name, b.name, 32)
graph.add_edge(b.name, d.name, 32)
graph.add_edge(d.name, e.name, 23)
graph.add_edge(e.name, d.name, 23)
mst = minimum_spanning_tree_parallel(graph, algorithm, 3)
expected_mst = [('0_3', 7), ('2_3', 9), ('3_4', 23), ('3_1', 32),
('3_0', 7), ('3_2', 9), ('4_3', 23), ('1_3', 32)]
assert len(expected_mst) == 2*len(mst.edge_weights.items())
for k, v in mst.edge_weights.items():
assert (k, v.value) in expected_mst

_test_minimum_spanning_tree_parallel("List", "kruskal")
_test_minimum_spanning_tree_parallel("Matrix", "kruskal")
19 changes: 15 additions & 4 deletions pydatastructs/linear_data_structures/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
'merge_sort_parallel'
]

def _merge(array, sl, el, sr, er, end):
def _merge(array, sl, el, sr, er, end, comp):
l, r = [], []
for i in range(sl, el + 1):
if (i <= end and
Expand All @@ -22,7 +22,7 @@ def _merge(array, sl, el, sr, er, end):
array[i] = None
i, j, k = 0, 0, sl
while i < len(l) and j < len(r):
if l[i] <= r[j]:
if comp(l[i], r[j]):
array[k] = l[i]
i += 1
else:
Expand Down Expand Up @@ -61,6 +61,13 @@ def merge_sort_parallel(array, num_threads, **kwargs):
is to be sorted.
Optional, by default the index
of the last position filled.
comp: lambda/function
The comparator which is to be used
for sorting. If the function returns
False then only swapping is performed.
Optional, by default, less than or
equal to is used for comparing two
values.

Examples
========
Expand All @@ -70,14 +77,18 @@ def merge_sort_parallel(array, num_threads, **kwargs):
>>> merge_sort_parallel(arr, 3)
>>> [arr[0], arr[1], arr[2]]
[1, 2, 3]
>>> merge_sort_parallel(arr, 3, comp=lambda u, v: u > v)
>>> [arr[0], arr[1], arr[2]]
[3, 2, 1]

References
==========

.. [1] https://en.wikipedia.org/wiki/Merge_sort
"""
start = kwargs.get('start', 0)
end = kwargs.get('end', array._size - 1)
end = kwargs.get('end', len(array) - 1)
comp = kwargs.get("comp", lambda u, v: u <= v)
for size in range(floor(log(end - start + 1, 2)) + 1):
pow_2 = 2**size
with ThreadPoolExecutor(max_workers=num_threads) as Executor:
Expand All @@ -88,7 +99,7 @@ def merge_sort_parallel(array, num_threads, **kwargs):
array,
i, i + pow_2 - 1,
i + pow_2, i + 2*pow_2 - 1,
end).result()
end, comp).result()
i = i + 2*pow_2

if _check_type(array, DynamicArray):
Expand Down
3 changes: 3 additions & 0 deletions pydatastructs/linear_data_structures/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ def fill(self, elem):
for i in range(self._size):
self._data[i] = elem

def __len__(self):
return self._size


class DynamicArray(Array):
"""
Expand Down
2 changes: 1 addition & 1 deletion pydatastructs/trees/binary_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class BinaryTree(object):
key
Required if tree is to be instantiated with
root otherwise not needed.
comp: lambda
comp: lambda/function
Optional, A lambda function which will be used
for comparison of keys. Should return a
bool value. By default it implements less
Expand Down