Skip to content

[Graph Partitioning] Add optimization to minimize communication cost and number of partitions. #2359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 44 additions & 10 deletions include/glow/Partitioner/Partitioner.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#define GLOW_PARTITIONER_PARTITIONER_H

#include "glow/Graph/Graph.h"
#include "glow/Partitioner/PartitionerUtils.h"
#include "glow/Runtime/RuntimeTypes.h"

#include "llvm/ADT/DenseMap.h"
Expand All @@ -29,19 +30,27 @@ namespace glow {

using namespace runtime;

using MemUsageMap = std::unordered_map<Node *, unsigned>;
using MemUsageMapTy = std::unordered_map<Node *, unsigned>;
using NodesSetTy = std::set<Node *>;
using PartitionCostMapTy = llvm::DenseMap<Function *, GraphMemInfo>;

/// Helper structure for building a partition. Records 1) a mapping of nodes in
/// the original function to destination partitions, along with a list of the
/// newly-created functions; 2) a mapping of newly-created functions aalong with
/// a set of nodes sets.
using NodeToFunctionMapTy = llvm::DenseMap<Node *, Function *>;
using FunctionToNodesMapTy = llvm::DenseMap<Function *, NodesSetTy>;

/// Helper structure for building a partition. Records a mapping of nodes in the
/// original function to destination partitions, along with a list of the
/// newly-created functions.
class NodeToFunctionMap {
using Map = llvm::DenseMap<Node *, Function *>;

/// Newly-created partitions.
FunctionList functions_;

/// Map of nodes in the original function to their target partition.
Map nodeToFunction_;
NodeToFunctionMapTy nodeToFunction_;

/// Map of sub-fuctions to their memory consumption.
PartitionCostMapTy partitionCost_;

public:
/// Create a new partition \p F.
Expand All @@ -54,10 +63,22 @@ class NodeToFunctionMap {
const FunctionList &getPartitions() const { return functions_; }

/// Map API.
Map::iterator find(Node *N) { return nodeToFunction_.find(N); }
Map::iterator begin() { return nodeToFunction_.begin(); }
Map::iterator end() { return nodeToFunction_.end(); }
NodeToFunctionMapTy::iterator find(Node *N) {
return nodeToFunction_.find(N);
}
NodeToFunctionMapTy::iterator begin() { return nodeToFunction_.begin(); }
NodeToFunctionMapTy::iterator end() { return nodeToFunction_.end(); }

Function *operator[](Node *n) { return nodeToFunction_[n]; }
void deletePartition(Function *func) { functions_.remove(func); }

/// Set the memory consumption \p cost for a partition \p func.
void setGraphMemInfo(Function *func, GraphMemInfo cost) {
partitionCost_[func] = cost;
}

/// Get the memory consumption for a partition \p func.
GraphMemInfo getGraphMemInfo(Function *func) { return partitionCost_[func]; }
};

/// The struct contains all the created DAGNodes. This DAGNodeList owns all the
Expand Down Expand Up @@ -92,7 +113,7 @@ class Partitioner {
size_t memSize_;

/// The map of each operator and the corresponding memory size.
MemUsageMap memUsage_;
MemUsageMapTy memUsage_;

/// Get the representative function (the one with the largest input) and
/// update the memSize.
Expand All @@ -102,6 +123,19 @@ class Partitioner {
/// function.
void initOpMemUsage();

/// Combine the partitions if necessary : if all outside uses of the nodes in
/// /// partition1 is in partition2, and the sum of memory consumption of
/// partition1 and partition2 is less than availableMemory, combine partition1
/// and partition2.
void partitionsCombine(NodeToFunctionMap &partitions,
FunctionToNodesMapTy &nodesSet,
uint64_t availableMemory);

/// After getting the intial partitions, ajust the partitions to miminize
/// communication and computation cost.
void partitionsAdjust(NodeToFunctionMap &partitions,
uint64_t availableMemory);

/// Assign nodes to partitions and return the mapping.
NodeToFunctionMap selectPartitions(Function *F, unsigned availableMemory);

Expand Down
48 changes: 48 additions & 0 deletions include/glow/Partitioner/PartitionerUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/**
* Copyright (c) 2017-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GLOW_PARTITIONER_PARTITIONUTILS_H
#define GLOW_PARTITIONER_PARTITIONUTILS_H

#include "glow/Graph/Graph.h"

namespace glow {

/// The memory usage of a subgraph (i.e. a list of nodes of a function).
struct GraphMemInfo {
// The memory usage of all input nodes (whose predecessors are not included in
// this subgraph) of this subgraph.
uint64_t inMemSize;
// The memory usage of all output nodes (whose successors are not included in
// this subgraph) of this subgraph.
uint64_t outMemSize;
// The memory usage of all constants used in this subgraph.
uint64_t constMemSize;

GraphMemInfo() : inMemSize(0), outMemSize(0), constMemSize(0){};
};

/// Given \p nodes, return a list of nodes who use any node in this set.
std::vector<Node *> getOutUsers(const std::set<Node *> &nodes);

/// Given \p nodes, return a list of nodes who use only the nodes in this set or
/// constant.
std::vector<Node *>
getOutUsersWithOnePredecessor(const std::set<Node *> &nodes);

/// Return the memory usage of a given nodes set.
GraphMemInfo getGraphMemInfo(const std::set<Node *> &nodes);
} // namespace glow
#endif // GLOW_PARTITIONER_PARTITIONUTILS_H
3 changes: 2 additions & 1 deletion lib/Partitioner/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
add_library(Partitioner
Partitioner.cpp)
PartitionerUtils.cpp
Partitioner.cpp)

target_link_libraries(Partitioner
PRIVATE
Expand Down
150 changes: 146 additions & 4 deletions lib/Partitioner/Partitioner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,148 @@ static BFSLevel getBFSLevel(Function *F) {
return bfs;
}

// Combine the partitions if necessary : if all outside uses of the nodes in
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use /// instead of //

// partition1 is in partition2, and the sum of memory consumption of partition1
// and partition2 is less than availableMemory, combine partition1 and
// partition2.
void Partitioner::partitionsCombine(NodeToFunctionMap &partitions,
FunctionToNodesMapTy &nodesSet,
uint64_t availableMemory) {

for (FunctionToNodesMapTy::iterator it = nodesSet.begin();
it != nodesSet.end(); ++it) {
std::vector<Node *> outUsers = getOutUsers((*it).second);
if (outUsers.empty()) {
continue;
}

bool flag = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a comment what this flag means?

for (int i = 1, e = outUsers.size(); i < e; i++) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Describe in a comment the purpose of this loop.

if (partitions[outUsers[i]] != partitions[outUsers[i - 1]]) {
flag = false;
break;
}
}
if (flag) {
// This partition only has one successor.
Function *cur = (*it).first;
Function *suc = partitions[outUsers[0]];
NodesSetTy tmp = nodesSet.lookup(suc);
GraphMemInfo cost1 = partitions.getGraphMemInfo(cur);
GraphMemInfo cost2 = partitions.getGraphMemInfo(suc);
if (cost1.constMemSize + cost1.inMemSize + cost2.constMemSize +
cost2.inMemSize - cost1.outMemSize <
availableMemory) {
// We can combine the two partitions to fit one device.
for (NodesSetTy::iterator it2 = tmp.begin(); it2 != tmp.end(); ++it2) {
partitions.add(*it2, cur);
}
(*it).second.insert(tmp.begin(), tmp.end());
partitions.deletePartition(suc);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for partition removals could be moved into its own helper method. It could be useful at other places as well, I guess.

nodesSet.erase(suc);
module_->eraseFunction(suc);
}
}
}
}

void Partitioner::partitionsAdjust(NodeToFunctionMap &partitions,
uint64_t availableMemory) {
// For each partitioin, create a node set.
FunctionToNodesMapTy nodesSet;
for (NodeToFunctionMapTy::iterator it = partitions.begin();
it != partitions.end(); ++it) {
nodesSet[(*it).second].insert((*it).first);
}

// Initial the memory cost for each partition. Now we use the output size to
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/Initial/Initialize/

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/Now/For now,/

// represent the communication cost.
for (FunctionToNodesMapTy::iterator it = nodesSet.begin();
it != nodesSet.end(); ++it) {
GraphMemInfo cost = getGraphMemInfo((*it).second);
partitions.setGraphMemInfo((*it).first, cost);
}

// Move/Exchange nodes between any two connected partitions, until no gain is
// get.
// Step1 Move: Assume Partition1 -> Partition2, try to move nodes from
// Partition2 to Partition1 if those nodes only use the nodes in
// Partition1(recursively) and the move won't make Partition1's memory exceeds
// the memory constraint, and the communication cost is minimized.
bool gain = true;
while (gain) {
// gain is initialized as false, it will be set to be true if there is at
// least one node can be moved from one set to antoher set.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/can/that can/

gain = false;
for (FunctionToNodesMapTy::iterator it = nodesSet.begin();
it != nodesSet.end(); ++it) {
NodesSetTy nSet = (*it).second;
std::vector<Node *> outUsers = getOutUsersWithOnePredecessor(nSet);
if (outUsers.empty()) {
continue;
}
Function *cur = (*it).first;
uint64_t memSize = partitions.getGraphMemInfo(cur).constMemSize +
partitions.getGraphMemInfo(cur).inMemSize;
uint64_t communicationCost = partitions.getGraphMemInfo(cur).outMemSize;
// Check if a node can be moved to current node set (i.e nSet).
for (int i = 0, e = outUsers.size(); i < e; i++) {
// Rule 1: this move won't break memory constraint.
if (memUsage_[outUsers[i]] + memSize > availableMemory) {
continue;
}
// Rule 2: this move won't cause constant duplication.
bool cont = false;
for (int j = 0, e1 = outUsers[i]->getNumInputs(); j < e1; j++) {
auto in = outUsers[i]->getNthInput(j);
if (isa<Storage>(in.getNode()) && !in.hasOneUse()) {
cont = true;
break;
}
}
if (cont) {
continue;
}
// Rule 3: this move won't increase communication cost. Even if this
// move won't change communication cost, according to rule 1 and rule 2,
// the memory consumption of the partition where this node (i.e
// outUsers[i]) belongs can be reduced. Therefore, it may trigger later
// node movement or paritionCombine.
nSet.insert(outUsers[i]);
GraphMemInfo cost = getGraphMemInfo(nSet);
if (cost.outMemSize <= communicationCost) {
// Move this node to current node set.
nSet.insert(outUsers[i]);
nodesSet[cur].insert(outUsers[i]);
Function *suc = partitions[outUsers[i]];
nodesSet[suc].erase(outUsers[i]);
// Update the partitions.
partitions.add(outUsers[i], cur);
partitions.setGraphMemInfo(cur, cost);
if (nodesSet[suc].empty()) {
// It is possible that after moving a node from Partition2 to
// Partition1, Partition2 become empty. Remove the empty partition.
partitions.deletePartition(suc);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here you could reuse the partition removal logic if it would be a separate method.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here nodesSet is not partitions's member, and this is just one circumstance under which a partition need to be removed. Therefore, I only add deletePartition as a method.

module_->eraseFunction(suc);
} else {
cost = getGraphMemInfo(nodesSet[suc]);
partitions.setGraphMemInfo(suc, cost);
}
gain = true;
communicationCost = cost.outMemSize;
memSize += memUsage_[outUsers[i]];
}
}
}
}

// TODO... :Step 2: exchange two nodes from two partitions to minimize
// communication cost.

// Combine the current partitions if necessary.
partitionsCombine(partitions, nodesSet, availableMemory);
}

/// Assign nodes to partitions and return the mapping.
NodeToFunctionMap Partitioner::selectPartitions(Function *F,
unsigned availableMemory) {
Expand All @@ -149,7 +291,7 @@ NodeToFunctionMap Partitioner::selectPartitions(Function *F,
// (cut[1], cut[0] - 1], ..., (-1, cut[n] - 1].
std::vector<int> cut;

// Step 1 : get the initial cut based on BFS levels and avaiableMemory.
// Step 1 : get the initial cut based on BFS levels and availableMemory.
// TODO .. need to remove the duplicated memory usage.
unsigned mem = 0;
for (int i = level - 1; i >= 0; i--) {
Expand Down Expand Up @@ -199,9 +341,9 @@ NodeToFunctionMap Partitioner::selectPartitions(Function *F,
}
}
}
// Step 3 : adjust the partition based on performance (Advanced Graph
// Paritioning algrithm will be applied here).
// --- TODO

// Step 3 : adjust the partition based on performance.
partitionsAdjust(mapping, availableMemory);

return mapping;
}
Expand Down
Loading