@@ -139,6 +139,148 @@ static BFSLevel getBFSLevel(Function *F) {
139
139
return bfs;
140
140
}
141
141
142
+ // Combine the partitions if necessary : if all outside uses of the nodes in
143
+ // partition1 is in partition2, and the sum of memory consumption of partition1
144
+ // and partition2 is less than availableMemory, combine partition1 and
145
+ // partition2.
146
+ void Partitioner::partitionsCombine (NodeToFunctionMap &partitions,
147
+ FunctionToNodesMapTy &nodesSet,
148
+ uint64_t availableMemory) {
149
+
150
+ for (FunctionToNodesMapTy::iterator it = nodesSet.begin ();
151
+ it != nodesSet.end (); ++it) {
152
+ std::vector<Node *> outUsers = getOutUsers ((*it).second );
153
+ if (outUsers.empty ()) {
154
+ continue ;
155
+ }
156
+
157
+ bool flag = true ;
158
+ for (int i = 1 , e = outUsers.size (); i < e; i++) {
159
+ if (partitions[outUsers[i]] != partitions[outUsers[i - 1 ]]) {
160
+ flag = false ;
161
+ break ;
162
+ }
163
+ }
164
+ if (flag) {
165
+ // This partition only has one successor.
166
+ Function *cur = (*it).first ;
167
+ Function *suc = partitions[outUsers[0 ]];
168
+ NodesSetTy tmp = nodesSet.lookup (suc);
169
+ GraphMemInfo cost1 = partitions.getGraphMemInfo (cur);
170
+ GraphMemInfo cost2 = partitions.getGraphMemInfo (suc);
171
+ if (cost1.constMemSize + cost1.inMemSize + cost2.constMemSize +
172
+ cost2.inMemSize - cost1.outMemSize <
173
+ availableMemory) {
174
+ // We can combine the two partitions to fit one device.
175
+ for (NodesSetTy::iterator it2 = tmp.begin (); it2 != tmp.end (); ++it2) {
176
+ partitions.add (*it2, cur);
177
+ }
178
+ (*it).second .insert (tmp.begin (), tmp.end ());
179
+ partitions.deletePartition (suc);
180
+ nodesSet.erase (suc);
181
+ module_->eraseFunction (suc);
182
+ }
183
+ }
184
+ }
185
+ }
186
+
187
+ void Partitioner::partitionsAdjust (NodeToFunctionMap &partitions,
188
+ uint64_t availableMemory) {
189
+ // For each partitioin, create a node set.
190
+ FunctionToNodesMapTy nodesSet;
191
+ for (NodeToFunctionMapTy::iterator it = partitions.begin ();
192
+ it != partitions.end (); ++it) {
193
+ nodesSet[(*it).second ].insert ((*it).first );
194
+ }
195
+
196
+ // Initial the memory cost for each partition. Now we use the output size to
197
+ // represent the communication cost.
198
+ for (FunctionToNodesMapTy::iterator it = nodesSet.begin ();
199
+ it != nodesSet.end (); ++it) {
200
+ GraphMemInfo cost = getGraphMemInfo ((*it).second );
201
+ partitions.setGraphMemInfo ((*it).first , cost);
202
+ }
203
+
204
+ // Move/Exchange nodes between any two connected partitions, until no gain is
205
+ // get.
206
+ // Step1 Move: Assume Partition1 -> Partition2, try to move nodes from
207
+ // Partition2 to Partition1 if those nodes only use the nodes in
208
+ // Partition1(recursively) and the move won't make Partition1's memory exceeds
209
+ // the memory constraint, and the communication cost is minimized.
210
+ bool gain = true ;
211
+ while (gain) {
212
+ // gain is initialized as false, it will be set to be true if there is at
213
+ // least one node can be moved from one set to antoher set.
214
+ gain = false ;
215
+ for (FunctionToNodesMapTy::iterator it = nodesSet.begin ();
216
+ it != nodesSet.end (); ++it) {
217
+ NodesSetTy nSet = (*it).second ;
218
+ std::vector<Node *> outUsers = getOutUsersWithOnePredecessor (nSet);
219
+ if (outUsers.empty ()) {
220
+ continue ;
221
+ }
222
+ Function *cur = (*it).first ;
223
+ uint64_t memSize = partitions.getGraphMemInfo (cur).constMemSize +
224
+ partitions.getGraphMemInfo (cur).inMemSize ;
225
+ uint64_t communicationCost = partitions.getGraphMemInfo (cur).outMemSize ;
226
+ // Check if a node can be moved to current node set (i.e nSet).
227
+ for (int i = 0 , e = outUsers.size (); i < e; i++) {
228
+ // Rule 1: this move won't break memory constraint.
229
+ if (memUsage_[outUsers[i]] + memSize > availableMemory) {
230
+ continue ;
231
+ }
232
+ // Rule 2: this move won't cause constant duplication.
233
+ bool cont = false ;
234
+ for (int j = 0 , e1 = outUsers[i]->getNumInputs (); j < e1 ; j++) {
235
+ auto in = outUsers[i]->getNthInput (j);
236
+ if (isa<Storage>(in.getNode ()) && !in.hasOneUse ()) {
237
+ cont = true ;
238
+ break ;
239
+ }
240
+ }
241
+ if (cont) {
242
+ continue ;
243
+ }
244
+ // Rule 3: this move won't increase communication cost. Even if this
245
+ // move won't change communication cost, according to rule 1 and rule 2,
246
+ // the memory consumption of the partition where this node (i.e
247
+ // outUsers[i]) belongs can be reduced. Therefore, it may trigger later
248
+ // node movement or paritionCombine.
249
+ nSet.insert (outUsers[i]);
250
+ GraphMemInfo cost = getGraphMemInfo (nSet);
251
+ if (cost.outMemSize <= communicationCost) {
252
+ // Move this node to current node set.
253
+ nSet.insert (outUsers[i]);
254
+ nodesSet[cur].insert (outUsers[i]);
255
+ Function *suc = partitions[outUsers[i]];
256
+ nodesSet[suc].erase (outUsers[i]);
257
+ // Update the partitions.
258
+ partitions.add (outUsers[i], cur);
259
+ partitions.setGraphMemInfo (cur, cost);
260
+ if (nodesSet[suc].empty ()) {
261
+ // It is possible that after moving a node from Partition2 to
262
+ // Partition1, Partition2 become empty. Remove the empty partition.
263
+ partitions.deletePartition (suc);
264
+ module_->eraseFunction (suc);
265
+ } else {
266
+ cost = getGraphMemInfo (nodesSet[suc]);
267
+ partitions.setGraphMemInfo (suc, cost);
268
+ }
269
+ gain = true ;
270
+ communicationCost = cost.outMemSize ;
271
+ memSize += memUsage_[outUsers[i]];
272
+ }
273
+ }
274
+ }
275
+ }
276
+
277
+ // TODO... :Step 2: exchange two nodes from two partitions to minimize
278
+ // communication cost.
279
+
280
+ // Combine the current partitions if necessary.
281
+ partitionsCombine (partitions, nodesSet, availableMemory);
282
+ }
283
+
142
284
// / Assign nodes to partitions and return the mapping.
143
285
NodeToFunctionMap Partitioner::selectPartitions (Function *F,
144
286
unsigned availableMemory) {
@@ -149,7 +291,7 @@ NodeToFunctionMap Partitioner::selectPartitions(Function *F,
149
291
// (cut[1], cut[0] - 1], ..., (-1, cut[n] - 1].
150
292
std::vector<int > cut;
151
293
152
- // Step 1 : get the initial cut based on BFS levels and avaiableMemory .
294
+ // Step 1 : get the initial cut based on BFS levels and availableMemory .
153
295
// TODO .. need to remove the duplicated memory usage.
154
296
unsigned mem = 0 ;
155
297
for (int i = level - 1 ; i >= 0 ; i--) {
@@ -199,9 +341,9 @@ NodeToFunctionMap Partitioner::selectPartitions(Function *F,
199
341
}
200
342
}
201
343
}
202
- // Step 3 : adjust the partition based on performance (Advanced Graph
203
- // Paritioning algrithm will be applied here) .
204
- // --- TODO
344
+
345
+ // Step 3 : adjust the partition based on performance .
346
+ partitionsAdjust (mapping, availableMemory);
205
347
206
348
return mapping;
207
349
}
0 commit comments