Skip to content

Commit ea39ebf

Browse files
author
Man Wang
committed
[Graph Partitioning] Add optimization to minimize communication cost and number of partitions.
1 parent b1c130c commit ea39ebf

File tree

6 files changed

+371
-16
lines changed

6 files changed

+371
-16
lines changed

include/glow/Partitioner/Partitioner.h

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#define GLOW_PARTITIONER_PARTITIONER_H
1818

1919
#include "glow/Graph/Graph.h"
20+
#include "glow/Partitioner/PartitionerUtils.h"
2021
#include "glow/Runtime/RuntimeTypes.h"
2122

2223
#include "llvm/ADT/DenseMap.h"
@@ -29,19 +30,27 @@ namespace glow {
2930

3031
using namespace runtime;
3132

32-
using MemUsageMap = std::unordered_map<Node *, unsigned>;
33+
using MemUsageMapTy = std::unordered_map<Node *, unsigned>;
34+
using NodesSetTy = std::set<Node *>;
35+
using PartitionCostMapTy = llvm::DenseMap<Function *, GraphMemInfo>;
36+
37+
/// Helper structure for building a partition. Records 1) a mapping of nodes in
38+
/// the original function to destination partitions, along with a list of the
39+
/// newly-created functions; 2) a mapping of newly-created functions aalong with
40+
/// a set of nodes sets.
41+
using NodeToFunctionMapTy = llvm::DenseMap<Node *, Function *>;
42+
using FunctionToNodesMapTy = llvm::DenseMap<Function *, NodesSetTy>;
3343

34-
/// Helper structure for building a partition. Records a mapping of nodes in the
35-
/// original function to destination partitions, along with a list of the
36-
/// newly-created functions.
3744
class NodeToFunctionMap {
38-
using Map = llvm::DenseMap<Node *, Function *>;
3945

4046
/// Newly-created partitions.
4147
FunctionList functions_;
4248

4349
/// Map of nodes in the original function to their target partition.
44-
Map nodeToFunction_;
50+
NodeToFunctionMapTy nodeToFunction_;
51+
52+
/// Map of sub-fuctions to their memory consumption.
53+
PartitionCostMapTy partitionCost_;
4554

4655
public:
4756
/// Create a new partition \p F.
@@ -54,10 +63,22 @@ class NodeToFunctionMap {
5463
const FunctionList &getPartitions() const { return functions_; }
5564

5665
/// Map API.
57-
Map::iterator find(Node *N) { return nodeToFunction_.find(N); }
58-
Map::iterator begin() { return nodeToFunction_.begin(); }
59-
Map::iterator end() { return nodeToFunction_.end(); }
66+
NodeToFunctionMapTy::iterator find(Node *N) {
67+
return nodeToFunction_.find(N);
68+
}
69+
NodeToFunctionMapTy::iterator begin() { return nodeToFunction_.begin(); }
70+
NodeToFunctionMapTy::iterator end() { return nodeToFunction_.end(); }
71+
6072
Function *operator[](Node *n) { return nodeToFunction_[n]; }
73+
void deletePartition(Function *func) { functions_.remove(func); }
74+
75+
/// Set the memory consumption \p cost for a partition \p func.
76+
void setGraphMemInfo(Function *func, GraphMemInfo cost) {
77+
partitionCost_[func] = cost;
78+
}
79+
80+
/// Get the memory consumption for a partition \p func.
81+
GraphMemInfo getGraphMemInfo(Function *func) { return partitionCost_[func]; }
6182
};
6283

6384
/// The struct contains all the created DAGNodes. This DAGNodeList owns all the
@@ -92,7 +113,7 @@ class Partitioner {
92113
size_t memSize_;
93114

94115
/// The map of each operator and the corresponding memory size.
95-
MemUsageMap memUsage_;
116+
MemUsageMapTy memUsage_;
96117

97118
/// Get the representative function (the one with the largest input) and
98119
/// update the memSize.
@@ -102,6 +123,19 @@ class Partitioner {
102123
/// function.
103124
void initOpMemUsage();
104125

126+
/// Combine the partitions if necessary : if all outside uses of the nodes in
127+
/// /// partition1 is in partition2, and the sum of memory consumption of
128+
/// partition1 and partition2 is less than availableMemory, combine partition1
129+
/// and partition2.
130+
void partitionsCombine(NodeToFunctionMap &partitions,
131+
FunctionToNodesMapTy &nodesSet,
132+
uint64_t availableMemory);
133+
134+
/// After getting the intial partitions, ajust the partitions to miminize
135+
/// communication and computation cost.
136+
void partitionsAdjust(NodeToFunctionMap &partitions,
137+
uint64_t availableMemory);
138+
105139
/// Assign nodes to partitions and return the mapping.
106140
NodeToFunctionMap selectPartitions(Function *F, unsigned availableMemory);
107141

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* Copyright (c) 2017-present, Facebook, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#ifndef GLOW_PARTITIONER_PARTITIONUTILS_H
17+
#define GLOW_PARTITIONER_PARTITIONUTILS_H
18+
19+
#include "glow/Graph/Graph.h"
20+
21+
namespace glow {
22+
23+
/// The memory usage of a subgraph (i.e. a list of nodes of a function).
24+
struct GraphMemInfo {
25+
// The memory usage of all input nodes (whose predecessors are not included in
26+
// this subgraph) of this subgraph.
27+
uint64_t inMemSize;
28+
// The memory usage of all output nodes (whose successors are not included in
29+
// this subgraph) of this subgraph.
30+
uint64_t outMemSize;
31+
// The memory usage of all constants used in this subgraph.
32+
uint64_t constMemSize;
33+
34+
GraphMemInfo() : inMemSize(0), outMemSize(0), constMemSize(0){};
35+
};
36+
37+
/// Given \p nodes, return a list of nodes who use any node in this set.
38+
std::vector<Node *> getOutUsers(const std::set<Node *> &nodes);
39+
40+
/// Given \p nodes, return a list of nodes who use only the nodes in this set or
41+
/// constant.
42+
std::vector<Node *>
43+
getOutUsersWithOnePredecessor(const std::set<Node *> &nodes);
44+
45+
/// Return the memory usage of a given nodes set.
46+
GraphMemInfo getGraphMemInfo(const std::set<Node *> &nodes);
47+
} // namespace glow
48+
#endif // GLOW_PARTITIONER_PARTITIONUTILS_H

lib/Partitioner/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
add_library(Partitioner
2-
Partitioner.cpp)
2+
PartitionerUtils.cpp
3+
Partitioner.cpp)
34

45
target_link_libraries(Partitioner
56
PRIVATE

lib/Partitioner/Partitioner.cpp

Lines changed: 146 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,148 @@ static BFSLevel getBFSLevel(Function *F) {
139139
return bfs;
140140
}
141141

142+
// Combine the partitions if necessary : if all outside uses of the nodes in
143+
// partition1 is in partition2, and the sum of memory consumption of partition1
144+
// and partition2 is less than availableMemory, combine partition1 and
145+
// partition2.
146+
void Partitioner::partitionsCombine(NodeToFunctionMap &partitions,
147+
FunctionToNodesMapTy &nodesSet,
148+
uint64_t availableMemory) {
149+
150+
for (FunctionToNodesMapTy::iterator it = nodesSet.begin();
151+
it != nodesSet.end(); ++it) {
152+
std::vector<Node *> outUsers = getOutUsers((*it).second);
153+
if (outUsers.empty()) {
154+
continue;
155+
}
156+
157+
bool flag = true;
158+
for (int i = 1, e = outUsers.size(); i < e; i++) {
159+
if (partitions[outUsers[i]] != partitions[outUsers[i - 1]]) {
160+
flag = false;
161+
break;
162+
}
163+
}
164+
if (flag) {
165+
// This partition only has one successor.
166+
Function *cur = (*it).first;
167+
Function *suc = partitions[outUsers[0]];
168+
NodesSetTy tmp = nodesSet.lookup(suc);
169+
GraphMemInfo cost1 = partitions.getGraphMemInfo(cur);
170+
GraphMemInfo cost2 = partitions.getGraphMemInfo(suc);
171+
if (cost1.constMemSize + cost1.inMemSize + cost2.constMemSize +
172+
cost2.inMemSize - cost1.outMemSize <
173+
availableMemory) {
174+
// We can combine the two partitions to fit one device.
175+
for (NodesSetTy::iterator it2 = tmp.begin(); it2 != tmp.end(); ++it2) {
176+
partitions.add(*it2, cur);
177+
}
178+
(*it).second.insert(tmp.begin(), tmp.end());
179+
partitions.deletePartition(suc);
180+
nodesSet.erase(suc);
181+
module_->eraseFunction(suc);
182+
}
183+
}
184+
}
185+
}
186+
187+
void Partitioner::partitionsAdjust(NodeToFunctionMap &partitions,
188+
uint64_t availableMemory) {
189+
// For each partitioin, create a node set.
190+
FunctionToNodesMapTy nodesSet;
191+
for (NodeToFunctionMapTy::iterator it = partitions.begin();
192+
it != partitions.end(); ++it) {
193+
nodesSet[(*it).second].insert((*it).first);
194+
}
195+
196+
// Initial the memory cost for each partition. Now we use the output size to
197+
// represent the communication cost.
198+
for (FunctionToNodesMapTy::iterator it = nodesSet.begin();
199+
it != nodesSet.end(); ++it) {
200+
GraphMemInfo cost = getGraphMemInfo((*it).second);
201+
partitions.setGraphMemInfo((*it).first, cost);
202+
}
203+
204+
// Move/Exchange nodes between any two connected partitions, until no gain is
205+
// get.
206+
// Step1 Move: Assume Partition1 -> Partition2, try to move nodes from
207+
// Partition2 to Partition1 if those nodes only use the nodes in
208+
// Partition1(recursively) and the move won't make Partition1's memory exceeds
209+
// the memory constraint, and the communication cost is minimized.
210+
bool gain = true;
211+
while (gain) {
212+
// gain is initialized as false, it will be set to be true if there is at
213+
// least one node can be moved from one set to antoher set.
214+
gain = false;
215+
for (FunctionToNodesMapTy::iterator it = nodesSet.begin();
216+
it != nodesSet.end(); ++it) {
217+
NodesSetTy nSet = (*it).second;
218+
std::vector<Node *> outUsers = getOutUsersWithOnePredecessor(nSet);
219+
if (outUsers.empty()) {
220+
continue;
221+
}
222+
Function *cur = (*it).first;
223+
uint64_t memSize = partitions.getGraphMemInfo(cur).constMemSize +
224+
partitions.getGraphMemInfo(cur).inMemSize;
225+
uint64_t communicationCost = partitions.getGraphMemInfo(cur).outMemSize;
226+
// Check if a node can be moved to current node set (i.e nSet).
227+
for (int i = 0, e = outUsers.size(); i < e; i++) {
228+
// Rule 1: this move won't break memory constraint.
229+
if (memUsage_[outUsers[i]] + memSize > availableMemory) {
230+
continue;
231+
}
232+
// Rule 2: this move won't cause constant duplication.
233+
bool cont = false;
234+
for (int j = 0, e1 = outUsers[i]->getNumInputs(); j < e1; j++) {
235+
auto in = outUsers[i]->getNthInput(j);
236+
if (isa<Storage>(in.getNode()) && !in.hasOneUse()) {
237+
cont = true;
238+
break;
239+
}
240+
}
241+
if (cont) {
242+
continue;
243+
}
244+
// Rule 3: this move won't increase communication cost. Even if this
245+
// move won't change communication cost, according to rule 1 and rule 2,
246+
// the memory consumption of the partition where this node (i.e
247+
// outUsers[i]) belongs can be reduced. Therefore, it may trigger later
248+
// node movement or paritionCombine.
249+
nSet.insert(outUsers[i]);
250+
GraphMemInfo cost = getGraphMemInfo(nSet);
251+
if (cost.outMemSize <= communicationCost) {
252+
// Move this node to current node set.
253+
nSet.insert(outUsers[i]);
254+
nodesSet[cur].insert(outUsers[i]);
255+
Function *suc = partitions[outUsers[i]];
256+
nodesSet[suc].erase(outUsers[i]);
257+
// Update the partitions.
258+
partitions.add(outUsers[i], cur);
259+
partitions.setGraphMemInfo(cur, cost);
260+
if (nodesSet[suc].empty()) {
261+
// It is possible that after moving a node from Partition2 to
262+
// Partition1, Partition2 become empty. Remove the empty partition.
263+
partitions.deletePartition(suc);
264+
module_->eraseFunction(suc);
265+
} else {
266+
cost = getGraphMemInfo(nodesSet[suc]);
267+
partitions.setGraphMemInfo(suc, cost);
268+
}
269+
gain = true;
270+
communicationCost = cost.outMemSize;
271+
memSize += memUsage_[outUsers[i]];
272+
}
273+
}
274+
}
275+
}
276+
277+
// TODO... :Step 2: exchange two nodes from two partitions to minimize
278+
// communication cost.
279+
280+
// Combine the current partitions if necessary.
281+
partitionsCombine(partitions, nodesSet, availableMemory);
282+
}
283+
142284
/// Assign nodes to partitions and return the mapping.
143285
NodeToFunctionMap Partitioner::selectPartitions(Function *F,
144286
unsigned availableMemory) {
@@ -149,7 +291,7 @@ NodeToFunctionMap Partitioner::selectPartitions(Function *F,
149291
// (cut[1], cut[0] - 1], ..., (-1, cut[n] - 1].
150292
std::vector<int> cut;
151293

152-
// Step 1 : get the initial cut based on BFS levels and avaiableMemory.
294+
// Step 1 : get the initial cut based on BFS levels and availableMemory.
153295
// TODO .. need to remove the duplicated memory usage.
154296
unsigned mem = 0;
155297
for (int i = level - 1; i >= 0; i--) {
@@ -199,9 +341,9 @@ NodeToFunctionMap Partitioner::selectPartitions(Function *F,
199341
}
200342
}
201343
}
202-
// Step 3 : adjust the partition based on performance (Advanced Graph
203-
// Paritioning algrithm will be applied here).
204-
// --- TODO
344+
345+
// Step 3 : adjust the partition based on performance.
346+
partitionsAdjust(mapping, availableMemory);
205347

206348
return mapping;
207349
}

0 commit comments

Comments
 (0)