diff --git a/pydatastructs/graphs/__init__.py b/pydatastructs/graphs/__init__.py index fb8a518db..751c645bc 100644 --- a/pydatastructs/graphs/__init__.py +++ b/pydatastructs/graphs/__init__.py @@ -10,7 +10,8 @@ from .algorithms import ( breadth_first_search, breadth_first_search_parallel, - minimum_spanning_tree + minimum_spanning_tree, + minimum_spanning_tree_parallel ) __all__.extend(algorithms.__all__) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index f38540312..3d6765f48 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -7,11 +7,13 @@ from pydatastructs.utils import GraphEdge from pydatastructs.miscellaneous_data_structures import DisjointSetForest from pydatastructs.graphs.graph import Graph +from pydatastructs.linear_data_structures.algorithms import merge_sort_parallel __all__ = [ 'breadth_first_search', 'breadth_first_search_parallel', - 'minimum_spanning_tree' + 'minimum_spanning_tree', + 'minimum_spanning_tree_parallel' ] def breadth_first_search( @@ -190,36 +192,109 @@ def _breadth_first_search_parallel_adjacency_list( _breadth_first_search_parallel_adjacency_matrix = _breadth_first_search_parallel_adjacency_list +def _generate_mst_object(graph): + mst = Graph(*[getattr(graph, str(v)) for v in graph.vertices]) + return mst + +def _sort_edges(graph, num_threads=None): + edges = list(graph.edge_weights.items()) + if num_threads is None: + sort_key = lambda item: item[1].value + return sorted(edges, key=sort_key) + + merge_sort_parallel(edges, num_threads, + comp=lambda u,v: u[1].value <= v[1].value) + return edges + def _minimum_spanning_tree_kruskal_adjacency_list(graph): - mst = Graph(*[getattr(graph, v) for v in graph.vertices]) - sort_key = lambda item: item[1].value + mst = _generate_mst_object(graph) dsf = DisjointSetForest() for v in graph.vertices: dsf.make_set(v) - for _, edge in sorted(graph.edge_weights.items(), key=sort_key): + for _, edge in _sort_edges(graph): u, v = edge.source.name, edge.target.name if dsf.find_root(u) is not dsf.find_root(v): mst.add_edge(u, v, edge.value) dsf.union(u, v) return mst -def _minimum_spanning_tree_kruskal_adjacency_matrix(graph): - mst = Graph(*[getattr(graph, str(v)) for v in graph.vertices]) - sort_key = lambda item: item[1].value +_minimum_spanning_tree_kruskal_adjacency_matrix = \ + _minimum_spanning_tree_kruskal_adjacency_list + +def minimum_spanning_tree(graph, algorithm): + """ + Computes a minimum spanning tree for the given + graph and algorithm. + + Parameters + ========== + + graph: Graph + The graph whose minimum spanning tree + has to be computed. + algorithm: str + The algorithm which should be used for + computing a minimum spanning tree. + Currently the following algorithms are + supported, + 'kruskal' -> Kruskal's algorithm as given in + [1]. + + Returns + ======= + + mst: Graph + A minimum spanning tree using the implementation + same as the graph provided in the input. + + Examples + ======== + + >>> from pydatastructs import Graph, AdjacencyListGraphNode + >>> from pydatastructs import minimum_spanning_tree + >>> u = AdjacencyListGraphNode('u') + >>> v = AdjacencyListGraphNode('v') + >>> G = Graph(u, v) + >>> G.add_edge(u.name, v.name, 3) + >>> mst = minimum_spanning_tree(G, 'kruskal') + >>> u_n = mst.neighbors(u.name) + >>> mst.get_edge(u.name, u_n[0].name).value + 3 + + References + ========== + + .. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm + """ + import pydatastructs.graphs.algorithms as algorithms + func = "_minimum_spanning_tree_" + algorithm + "_" + graph._impl + if not hasattr(algorithms, func): + raise NotImplementedError( + "Currently %s algoithm for %s implementation of graphs " + "isn't implemented for finding minimum spanning trees." + %(algorithm, graph._impl)) + return getattr(algorithms, func)(graph) + +def _minimum_spanning_tree_parallel_kruskal_adjacency_list(graph, num_threads): + mst = _generate_mst_object(graph) dsf = DisjointSetForest() for v in graph.vertices: dsf.make_set(v) - for _, edge in sorted(graph.edge_weights.items(), key=sort_key): + edges = _sort_edges(graph, num_threads) + for _, edge in edges: u, v = edge.source.name, edge.target.name if dsf.find_root(u) is not dsf.find_root(v): mst.add_edge(u, v, edge.value) dsf.union(u, v) return mst -def minimum_spanning_tree(graph, algorithm): +_minimum_spanning_tree_parallel_kruskal_adjacency_matrix = \ + _minimum_spanning_tree_parallel_kruskal_adjacency_list + +def minimum_spanning_tree_parallel(graph, algorithm, num_threads): """ Computes a minimum spanning tree for the given - graph and algorithm. + graph and algorithm using the given number of threads. Parameters ========== @@ -234,6 +309,8 @@ def minimum_spanning_tree(graph, algorithm): supported, 'kruskal' -> Kruskal's algorithm as given in [1]. + num_threads: int + The number of threads to be used. Returns ======= @@ -246,12 +323,12 @@ def minimum_spanning_tree(graph, algorithm): ======== >>> from pydatastructs import Graph, AdjacencyListGraphNode - >>> from pydatastructs import minimum_spanning_tree + >>> from pydatastructs import minimum_spanning_tree_parallel >>> u = AdjacencyListGraphNode('u') >>> v = AdjacencyListGraphNode('v') >>> G = Graph(u, v) >>> G.add_edge(u.name, v.name, 3) - >>> mst = minimum_spanning_tree(G, 'kruskal') + >>> mst = minimum_spanning_tree_parallel(G, 'kruskal', 3) >>> u_n = mst.neighbors(u.name) >>> mst.get_edge(u.name, u_n[0].name).value 3 @@ -259,13 +336,13 @@ def minimum_spanning_tree(graph, algorithm): References ========== - .. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm + .. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm#Parallel_algorithm """ import pydatastructs.graphs.algorithms as algorithms - func = "_minimum_spanning_tree_" + algorithm + "_" + graph._impl + func = "_minimum_spanning_tree_parallel_" + algorithm + "_" + graph._impl if not hasattr(algorithms, func): raise NotImplementedError( "Currently %s algoithm for %s implementation of graphs " "isn't implemented for finding minimum spanning trees." %(algorithm, graph._impl)) - return getattr(algorithms, func)(graph) + return getattr(algorithms, func)(graph, num_threads) diff --git a/pydatastructs/graphs/tests/test_algorithms.py b/pydatastructs/graphs/tests/test_algorithms.py index a5c28ea85..5bce06b19 100644 --- a/pydatastructs/graphs/tests/test_algorithms.py +++ b/pydatastructs/graphs/tests/test_algorithms.py @@ -1,5 +1,6 @@ from pydatastructs import (breadth_first_search, Graph, -breadth_first_search_parallel, minimum_spanning_tree) +breadth_first_search_parallel, minimum_spanning_tree, +minimum_spanning_tree_parallel) def test_breadth_first_search(): @@ -148,3 +149,30 @@ def _test_minimum_spanning_tree(ds, algorithm): _test_minimum_spanning_tree("List", "kruskal") _test_minimum_spanning_tree("Matrix", "kruskal") + +def test_minimum_spanning_tree_parallel(): + + def _test_minimum_spanning_tree_parallel(ds, algorithm): + import pydatastructs.utils.misc_util as utils + GraphNode = getattr(utils, "Adjacency" + ds + "GraphNode") + a, b, c, d, e = [GraphNode(x) for x in [0, 1, 2, 3, 4]] + graph = Graph(a, b, c, d, e) + graph.add_edge(a.name, c.name, 10) + graph.add_edge(c.name, a.name, 10) + graph.add_edge(a.name, d.name, 7) + graph.add_edge(d.name, a.name, 7) + graph.add_edge(c.name, d.name, 9) + graph.add_edge(d.name, c.name, 9) + graph.add_edge(d.name, b.name, 32) + graph.add_edge(b.name, d.name, 32) + graph.add_edge(d.name, e.name, 23) + graph.add_edge(e.name, d.name, 23) + mst = minimum_spanning_tree_parallel(graph, algorithm, 3) + expected_mst = [('0_3', 7), ('2_3', 9), ('3_4', 23), ('3_1', 32), + ('3_0', 7), ('3_2', 9), ('4_3', 23), ('1_3', 32)] + assert len(expected_mst) == 2*len(mst.edge_weights.items()) + for k, v in mst.edge_weights.items(): + assert (k, v.value) in expected_mst + + _test_minimum_spanning_tree_parallel("List", "kruskal") + _test_minimum_spanning_tree_parallel("Matrix", "kruskal") diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py index fa830dfc4..8bca46feb 100644 --- a/pydatastructs/linear_data_structures/algorithms.py +++ b/pydatastructs/linear_data_structures/algorithms.py @@ -8,7 +8,7 @@ 'merge_sort_parallel' ] -def _merge(array, sl, el, sr, er, end): +def _merge(array, sl, el, sr, er, end, comp): l, r = [], [] for i in range(sl, el + 1): if (i <= end and @@ -22,7 +22,7 @@ def _merge(array, sl, el, sr, er, end): array[i] = None i, j, k = 0, 0, sl while i < len(l) and j < len(r): - if l[i] <= r[j]: + if comp(l[i], r[j]): array[k] = l[i] i += 1 else: @@ -61,6 +61,13 @@ def merge_sort_parallel(array, num_threads, **kwargs): is to be sorted. Optional, by default the index of the last position filled. + comp: lambda/function + The comparator which is to be used + for sorting. If the function returns + False then only swapping is performed. + Optional, by default, less than or + equal to is used for comparing two + values. Examples ======== @@ -70,6 +77,9 @@ def merge_sort_parallel(array, num_threads, **kwargs): >>> merge_sort_parallel(arr, 3) >>> [arr[0], arr[1], arr[2]] [1, 2, 3] + >>> merge_sort_parallel(arr, 3, comp=lambda u, v: u > v) + >>> [arr[0], arr[1], arr[2]] + [3, 2, 1] References ========== @@ -77,7 +87,8 @@ def merge_sort_parallel(array, num_threads, **kwargs): .. [1] https://en.wikipedia.org/wiki/Merge_sort """ start = kwargs.get('start', 0) - end = kwargs.get('end', array._size - 1) + end = kwargs.get('end', len(array) - 1) + comp = kwargs.get("comp", lambda u, v: u <= v) for size in range(floor(log(end - start + 1, 2)) + 1): pow_2 = 2**size with ThreadPoolExecutor(max_workers=num_threads) as Executor: @@ -88,7 +99,7 @@ def merge_sort_parallel(array, num_threads, **kwargs): array, i, i + pow_2 - 1, i + pow_2, i + 2*pow_2 - 1, - end).result() + end, comp).result() i = i + 2*pow_2 if _check_type(array, DynamicArray): diff --git a/pydatastructs/linear_data_structures/arrays.py b/pydatastructs/linear_data_structures/arrays.py index 130098b87..49a419c67 100644 --- a/pydatastructs/linear_data_structures/arrays.py +++ b/pydatastructs/linear_data_structures/arrays.py @@ -127,6 +127,9 @@ def fill(self, elem): for i in range(self._size): self._data[i] = elem + def __len__(self): + return self._size + class DynamicArray(Array): """ diff --git a/pydatastructs/trees/binary_trees.py b/pydatastructs/trees/binary_trees.py index d10fd4380..a249029c3 100644 --- a/pydatastructs/trees/binary_trees.py +++ b/pydatastructs/trees/binary_trees.py @@ -28,7 +28,7 @@ class BinaryTree(object): key Required if tree is to be instantiated with root otherwise not needed. - comp: lambda + comp: lambda/function Optional, A lambda function which will be used for comparison of keys. Should return a bool value. By default it implements less