From 541f8192c92e3f935c725660f70e0e4699748c20 Mon Sep 17 00:00:00 2001 From: czgdp1807 Date: Thu, 19 Mar 2020 20:41:46 +0530 Subject: [PATCH 1/4] minor rename --- .../tests/{test_algorithm.py => test_algorithms.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pydatastructs/linear_data_structures/tests/{test_algorithm.py => test_algorithms.py} (100%) diff --git a/pydatastructs/linear_data_structures/tests/test_algorithm.py b/pydatastructs/linear_data_structures/tests/test_algorithms.py similarity index 100% rename from pydatastructs/linear_data_structures/tests/test_algorithm.py rename to pydatastructs/linear_data_structures/tests/test_algorithms.py From 82d85ddcfcf1fa2d1bdd18eac26f13d218df870b Mon Sep 17 00:00:00 2001 From: czgdp1807 Date: Sat, 21 Mar 2020 15:06:40 +0530 Subject: [PATCH 2/4] allowed custom comparator in sorting --- .../linear_data_structures/algorithms.py | 17 ++++++++++++++--- pydatastructs/trees/binary_trees.py | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py index fa830dfc4..312c1d1b1 100644 --- a/pydatastructs/linear_data_structures/algorithms.py +++ b/pydatastructs/linear_data_structures/algorithms.py @@ -8,7 +8,7 @@ 'merge_sort_parallel' ] -def _merge(array, sl, el, sr, er, end): +def _merge(array, sl, el, sr, er, end, comp): l, r = [], [] for i in range(sl, el + 1): if (i <= end and @@ -22,7 +22,7 @@ def _merge(array, sl, el, sr, er, end): array[i] = None i, j, k = 0, 0, sl while i < len(l) and j < len(r): - if l[i] <= r[j]: + if comp(l[i], r[j]): array[k] = l[i] i += 1 else: @@ -61,6 +61,13 @@ def merge_sort_parallel(array, num_threads, **kwargs): is to be sorted. Optional, by default the index of the last position filled. + comp: lambda/function + The comparator which is to be used + for sorting. If the function returns + False then only swapping is performed. + Optional, by default, less than or + equal to is used for comparing two + values. Examples ======== @@ -70,6 +77,9 @@ def merge_sort_parallel(array, num_threads, **kwargs): >>> merge_sort_parallel(arr, 3) >>> [arr[0], arr[1], arr[2]] [1, 2, 3] + >>> merge_sort_parallel(arr, 3, comp=lambda u, v: u > v) + >>> [arr[0], arr[1], arr[2]] + [3, 2, 1] References ========== @@ -78,6 +88,7 @@ def merge_sort_parallel(array, num_threads, **kwargs): """ start = kwargs.get('start', 0) end = kwargs.get('end', array._size - 1) + comp = kwargs.get("comp", lambda u, v: u <= v) for size in range(floor(log(end - start + 1, 2)) + 1): pow_2 = 2**size with ThreadPoolExecutor(max_workers=num_threads) as Executor: @@ -88,7 +99,7 @@ def merge_sort_parallel(array, num_threads, **kwargs): array, i, i + pow_2 - 1, i + pow_2, i + 2*pow_2 - 1, - end).result() + end, comp).result() i = i + 2*pow_2 if _check_type(array, DynamicArray): diff --git a/pydatastructs/trees/binary_trees.py b/pydatastructs/trees/binary_trees.py index d10fd4380..a249029c3 100644 --- a/pydatastructs/trees/binary_trees.py +++ b/pydatastructs/trees/binary_trees.py @@ -28,7 +28,7 @@ class BinaryTree(object): key Required if tree is to be instantiated with root otherwise not needed. - comp: lambda + comp: lambda/function Optional, A lambda function which will be used for comparison of keys. Should return a bool value. By default it implements less From 75f2e5ad7acdf336da037b6ca919fab7e237452b Mon Sep 17 00:00:00 2001 From: czgdp1807 Date: Sat, 21 Mar 2020 19:23:54 +0530 Subject: [PATCH 3/4] ready for optimisation --- pydatastructs/graphs/__init__.py | 3 +- pydatastructs/graphs/algorithms.py | 63 ++++++++++++++----- pydatastructs/graphs/tests/test_algorithms.py | 30 ++++++++- .../linear_data_structures/algorithms.py | 2 +- .../linear_data_structures/arrays.py | 3 + 5 files changed, 82 insertions(+), 19 deletions(-) diff --git a/pydatastructs/graphs/__init__.py b/pydatastructs/graphs/__init__.py index fb8a518db..751c645bc 100644 --- a/pydatastructs/graphs/__init__.py +++ b/pydatastructs/graphs/__init__.py @@ -10,7 +10,8 @@ from .algorithms import ( breadth_first_search, breadth_first_search_parallel, - minimum_spanning_tree + minimum_spanning_tree, + minimum_spanning_tree_parallel ) __all__.extend(algorithms.__all__) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index f38540312..2ececd3bf 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -7,11 +7,13 @@ from pydatastructs.utils import GraphEdge from pydatastructs.miscellaneous_data_structures import DisjointSetForest from pydatastructs.graphs.graph import Graph +from pydatastructs.linear_data_structures.algorithms import merge_sort_parallel __all__ = [ 'breadth_first_search', 'breadth_first_search_parallel', - 'minimum_spanning_tree' + 'minimum_spanning_tree', + 'minimum_spanning_tree_parallel' ] def breadth_first_search( @@ -190,32 +192,35 @@ def _breadth_first_search_parallel_adjacency_list( _breadth_first_search_parallel_adjacency_matrix = _breadth_first_search_parallel_adjacency_list -def _minimum_spanning_tree_kruskal_adjacency_list(graph): - mst = Graph(*[getattr(graph, v) for v in graph.vertices]) - sort_key = lambda item: item[1].value - dsf = DisjointSetForest() - for v in graph.vertices: - dsf.make_set(v) - for _, edge in sorted(graph.edge_weights.items(), key=sort_key): - u, v = edge.source.name, edge.target.name - if dsf.find_root(u) is not dsf.find_root(v): - mst.add_edge(u, v, edge.value) - dsf.union(u, v) +def _generate_mst_object(graph): + mst = Graph(*[getattr(graph, str(v)) for v in graph.vertices]) return mst -def _minimum_spanning_tree_kruskal_adjacency_matrix(graph): - mst = Graph(*[getattr(graph, str(v)) for v in graph.vertices]) - sort_key = lambda item: item[1].value +def _sort_edges(graph, num_threads=None): + edges = list(graph.edge_weights.items()) + if num_threads is None: + sort_key = lambda item: item[1].value + return sorted(edges, key=sort_key) + + merge_sort_parallel(edges, num_threads, + comp=lambda u,v: u[1].value <= v[1].value) + return edges + +def _minimum_spanning_tree_kruskal_adjacency_list(graph): + mst = _generate_mst_object(graph) dsf = DisjointSetForest() for v in graph.vertices: dsf.make_set(v) - for _, edge in sorted(graph.edge_weights.items(), key=sort_key): + for _, edge in _sort_edges(graph): u, v = edge.source.name, edge.target.name if dsf.find_root(u) is not dsf.find_root(v): mst.add_edge(u, v, edge.value) dsf.union(u, v) return mst +_minimum_spanning_tree_kruskal_adjacency_matrix = \ + _minimum_spanning_tree_kruskal_adjacency_list + def minimum_spanning_tree(graph, algorithm): """ Computes a minimum spanning tree for the given @@ -269,3 +274,29 @@ def minimum_spanning_tree(graph, algorithm): "isn't implemented for finding minimum spanning trees." %(algorithm, graph._impl)) return getattr(algorithms, func)(graph) + +def _minimum_spanning_tree_parallel_kruskal_adjacency_list(graph, num_threads): + mst = _generate_mst_object(graph) + dsf = DisjointSetForest() + for v in graph.vertices: + dsf.make_set(v) + edges = _sort_edges(graph, num_threads) + for _, edge in edges: + u, v = edge.source.name, edge.target.name + if dsf.find_root(u) is not dsf.find_root(v): + mst.add_edge(u, v, edge.value) + dsf.union(u, v) + return mst + +_minimum_spanning_tree_parallel_kruskal_adjacency_matrix = \ + _minimum_spanning_tree_parallel_kruskal_adjacency_list + +def minimum_spanning_tree_parallel(graph, algorithm, num_threads): + import pydatastructs.graphs.algorithms as algorithms + func = "_minimum_spanning_tree_parallel_" + algorithm + "_" + graph._impl + if not hasattr(algorithms, func): + raise NotImplementedError( + "Currently %s algoithm for %s implementation of graphs " + "isn't implemented for finding minimum spanning trees." + %(algorithm, graph._impl)) + return getattr(algorithms, func)(graph, num_threads) diff --git a/pydatastructs/graphs/tests/test_algorithms.py b/pydatastructs/graphs/tests/test_algorithms.py index a5c28ea85..5bce06b19 100644 --- a/pydatastructs/graphs/tests/test_algorithms.py +++ b/pydatastructs/graphs/tests/test_algorithms.py @@ -1,5 +1,6 @@ from pydatastructs import (breadth_first_search, Graph, -breadth_first_search_parallel, minimum_spanning_tree) +breadth_first_search_parallel, minimum_spanning_tree, +minimum_spanning_tree_parallel) def test_breadth_first_search(): @@ -148,3 +149,30 @@ def _test_minimum_spanning_tree(ds, algorithm): _test_minimum_spanning_tree("List", "kruskal") _test_minimum_spanning_tree("Matrix", "kruskal") + +def test_minimum_spanning_tree_parallel(): + + def _test_minimum_spanning_tree_parallel(ds, algorithm): + import pydatastructs.utils.misc_util as utils + GraphNode = getattr(utils, "Adjacency" + ds + "GraphNode") + a, b, c, d, e = [GraphNode(x) for x in [0, 1, 2, 3, 4]] + graph = Graph(a, b, c, d, e) + graph.add_edge(a.name, c.name, 10) + graph.add_edge(c.name, a.name, 10) + graph.add_edge(a.name, d.name, 7) + graph.add_edge(d.name, a.name, 7) + graph.add_edge(c.name, d.name, 9) + graph.add_edge(d.name, c.name, 9) + graph.add_edge(d.name, b.name, 32) + graph.add_edge(b.name, d.name, 32) + graph.add_edge(d.name, e.name, 23) + graph.add_edge(e.name, d.name, 23) + mst = minimum_spanning_tree_parallel(graph, algorithm, 3) + expected_mst = [('0_3', 7), ('2_3', 9), ('3_4', 23), ('3_1', 32), + ('3_0', 7), ('3_2', 9), ('4_3', 23), ('1_3', 32)] + assert len(expected_mst) == 2*len(mst.edge_weights.items()) + for k, v in mst.edge_weights.items(): + assert (k, v.value) in expected_mst + + _test_minimum_spanning_tree_parallel("List", "kruskal") + _test_minimum_spanning_tree_parallel("Matrix", "kruskal") diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py index 312c1d1b1..8bca46feb 100644 --- a/pydatastructs/linear_data_structures/algorithms.py +++ b/pydatastructs/linear_data_structures/algorithms.py @@ -87,7 +87,7 @@ def merge_sort_parallel(array, num_threads, **kwargs): .. [1] https://en.wikipedia.org/wiki/Merge_sort """ start = kwargs.get('start', 0) - end = kwargs.get('end', array._size - 1) + end = kwargs.get('end', len(array) - 1) comp = kwargs.get("comp", lambda u, v: u <= v) for size in range(floor(log(end - start + 1, 2)) + 1): pow_2 = 2**size diff --git a/pydatastructs/linear_data_structures/arrays.py b/pydatastructs/linear_data_structures/arrays.py index 130098b87..49a419c67 100644 --- a/pydatastructs/linear_data_structures/arrays.py +++ b/pydatastructs/linear_data_structures/arrays.py @@ -127,6 +127,9 @@ def fill(self, elem): for i in range(self._size): self._data[i] = elem + def __len__(self): + return self._size + class DynamicArray(Array): """ From 9dd107517fe1d2f8c811e1bc58043c428b4d43a9 Mon Sep 17 00:00:00 2001 From: czgdp1807 Date: Sat, 21 Mar 2020 19:55:58 +0530 Subject: [PATCH 4/4] docs done --- pydatastructs/graphs/algorithms.py | 46 ++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index 2ececd3bf..3d6765f48 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -292,6 +292,52 @@ def _minimum_spanning_tree_parallel_kruskal_adjacency_list(graph, num_threads): _minimum_spanning_tree_parallel_kruskal_adjacency_list def minimum_spanning_tree_parallel(graph, algorithm, num_threads): + """ + Computes a minimum spanning tree for the given + graph and algorithm using the given number of threads. + + Parameters + ========== + + graph: Graph + The graph whose minimum spanning tree + has to be computed. + algorithm: str + The algorithm which should be used for + computing a minimum spanning tree. + Currently the following algorithms are + supported, + 'kruskal' -> Kruskal's algorithm as given in + [1]. + num_threads: int + The number of threads to be used. + + Returns + ======= + + mst: Graph + A minimum spanning tree using the implementation + same as the graph provided in the input. + + Examples + ======== + + >>> from pydatastructs import Graph, AdjacencyListGraphNode + >>> from pydatastructs import minimum_spanning_tree_parallel + >>> u = AdjacencyListGraphNode('u') + >>> v = AdjacencyListGraphNode('v') + >>> G = Graph(u, v) + >>> G.add_edge(u.name, v.name, 3) + >>> mst = minimum_spanning_tree_parallel(G, 'kruskal', 3) + >>> u_n = mst.neighbors(u.name) + >>> mst.get_edge(u.name, u_n[0].name).value + 3 + + References + ========== + + .. [1] https://en.wikipedia.org/wiki/Kruskal%27s_algorithm#Parallel_algorithm + """ import pydatastructs.graphs.algorithms as algorithms func = "_minimum_spanning_tree_parallel_" + algorithm + "_" + graph._impl if not hasattr(algorithms, func):