diff --git a/include/glow/Partitioner/Partitioner.h b/include/glow/Partitioner/Partitioner.h index 83b80f5cd5..48ac4340b8 100644 --- a/include/glow/Partitioner/Partitioner.h +++ b/include/glow/Partitioner/Partitioner.h @@ -16,7 +16,7 @@ #ifndef GLOW_PARTITIONER_PARTITIONER_H #define GLOW_PARTITIONER_PARTITIONER_H -#include "glow/Partitioner/PartitionerTypes.h" +#include "glow/Partitioner/PartitionerBase.h" #include "glow/Support/Error.h" namespace glow { @@ -25,7 +25,7 @@ using namespace runtime; /// Given a module, partitions each of the its functions into multiple ones /// based on memory constraints and minimizes the communication cost. -class Partitioner { +class Partitioner final : public PartitionerBase { /// The module that needs to be decomposed. Module *module_; @@ -36,7 +36,10 @@ class Partitioner { /// The cost model related to device. std::vector deviceInfo_; - /// The backend pointers. + /// The backends created in Partitioner. Used for function optimization. + std::vector> backendHolder; + + /// The raw backend pointers. std::vector backends_; /// The map between backend name and BackendInfo. @@ -50,9 +53,6 @@ class Partitioner { /// needed after partitions. DeviceIDTy logicalDeviceID_; - /// The result of module partitioning. - DAGListTy partitions_; - /// Total memory (bytes) requested by one module. uint64_t memSize_; @@ -82,11 +82,19 @@ class Partitioner { NodeToFunctionMap selectPartitions(Function *F, uint64_t availableMemory, llvm::StringRef backendName); - /// Duplicates all networks in the module order to saturate the Host. - void saturateHost(unsigned logicalDeviceCount); - - FunctionToBackendNameMap - backendBasedPartition(Function *F, std::vector &backends, + /// Duplicates \p partitions in the module order to saturate the Host. \p + /// logicalDeviceCount is the number of logical devices used by the current + /// partitions. For example: If a network is partitioned into two parts (\p + /// logicalDeviceCount) and there are six devices this would duplicate the + /// network three times. + void saturateHost(unsigned logicalDeviceCount, const DAGListTy &partitions); + + /// Partition a function \p F based on backends \p backends. \returns the + /// final partition result(or an err) and a map between partitions and backend + /// names. \p cctx is used for functions optimization. + llvm::Expected + backendBasedPartition(FunctionToBackendNameMap &funcToBackend, Function *F, + std::vector &backends, CompilationContext &cctx); /// Performs a load balancing optimization pass to optimize for load @@ -96,26 +104,20 @@ class Partitioner { llvm::StringRef backendName, NodeToFunctionMap &mapping); - /// Given the node-function mapping, do the actual partitioning. If \p saveDAG - /// is true, the DAG will be saved into partitions_, which is the final - /// partition result. - void doPartitioning(llvm::StringRef funcName, std::vector, - NodeToFunctionMap &mapping, bool saveDAG); - /// If there is no need to do any partition, just generate the DAGNode based /// on current functions in this module for backend \p backendName found in \p - /// backendMap. \p cctx is used during optimization of the Function. \returns - /// whether there was an error encountered. - llvm::Error + /// backendMap. \p cctx is used for function optimization. \returns the + /// partition result or an error. + llvm::Expected createDAGWithoutPartition(llvm::StringRef backendName, std::map &backendMap, CompilationContext &cctx); - /// Get the map between the backend name and the concrete backend info (e.g. - /// backend pointer, mem, number) used in this partiton. If there are backends - /// need to be created, we use \p backendsHolder to hold them for memory - /// purpose. - void getBackendMap(std::map &backendMap, + /// Create the map between the backend name and the concrete backend info + /// (e.g. backend pointer, mem, number) used in this partiton. If there are + /// backends need to be created, we use \p backendsHolder to hold them for + /// memory purpose. + void genBackendMap(std::map &backendMap, std::vector> &backendsHolder, std::vector &backends); @@ -141,30 +143,30 @@ class Partitioner { const std::vector &backends, bool saturateHost = false, bool optimized = false); - /// Based on partitionConfig_ passed into Partitioner, do the user-defined + /// Based on \p partitionConfig passed into Partitioner, do user-defined /// partition. - llvm::Error PartitionFromConfig(); - - /// Decompose each function in a module. Now we support partitioning a module - /// among different type of devices. \p cctx is used during optimization of - /// the Function. \returns whether there was an error encountered. - llvm::Error Partition(CompilationContext &cctx); + llvm::Expected + partitionFromConfig(const PartitionConfig &partitionConfig); /// This partition approach is used in Glow Quantization Profiling flow. The /// backendBasedPartition is applied first in case there are heterogeneous /// backends. Then each sub-function will be compiled and run in CPU backend - /// for profiling. - llvm::Error QuantizationProfilingPartition(CompilationContext &cctx, - Function *F, - std::vector backends); - - /// Get the final partitions. - DAGListTy &getPartitionResult() { return partitions_; } - - /// Dump the partition result to a dot file. Since now all functions belong to - /// a function family and they have the same partition, we only dump the one - /// function's partition. - void dumpDAG(llvm::StringRef dotFilename) const; + /// for profiling. \p cctx is used for function optimization. \returns the + /// partition result or an error. + llvm::Expected + quantizationProfilingPartition(CompilationContext &cctx); + + /// This partition approch first do the partition based on backend types, and + /// then based on cost models(memory usage and performance). \p cctx is used + /// for function optimization. \returns the partition result or an error. + llvm::Expected heterogeneousPartition(CompilationContext &cctx); + + /// Decompose each function in a module. Given the parameters, this function + /// will choose different partition approches supported in this class: + /// heterogeneous partition, user-defined partition or quantization profiling. + /// \p cctx is used for function optimization. \returns the partition result + /// or an error. + llvm::Expected partition(CompilationContext &cctx) override; }; } // namespace glow #endif // GLOW_PARTITIONER_PARTITIONER_H diff --git a/include/glow/Partitioner/PartitionerBase.h b/include/glow/Partitioner/PartitionerBase.h new file mode 100644 index 0000000000..3766550a86 --- /dev/null +++ b/include/glow/Partitioner/PartitionerBase.h @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef GLOW_PARTITIONER_PARTITIONERBASE_H +#define GLOW_PARTITIONER_PARTITIONERBASE_H + +#include "glow/Partitioner/PartitionerTypes.h" +#include "glow/Support/Error.h" + +namespace glow { + +using namespace runtime; +/// Given a module, partitions each of the its functions into multiple ones +/// based on memory constraints and minimizes the communication cost. +class PartitionerBase { +public: + virtual ~PartitionerBase() = default; + + /// Decompose each function in a module. \p cctx is used in function + /// optimization. \returns the partition result. + virtual llvm::Expected partition(CompilationContext &cctx) = 0; + + /// Dump the partition result \p partitions to a dot file with name \p + /// dotFilename. Since now all functions belong to a function family and they + /// have the same partition, we only dump the one function's partition. + void dumpDAG(llvm::StringRef dotFilename, const DAGListTy &partitions) const; + +protected: + /// Given the node-function mapping \p mapping, do the actual partitioning. If + /// \p saveDAG is true, the DAG will be generated. \returns the final + /// partitions or an empty partition (If \p saveDAG is false). + DAGListTy doPartitioning(llvm::StringRef funcName, std::vector, + Module *module, NodeToFunctionMap &mapping, + bool saveDAG); +}; +} // namespace glow +#endif // GLOW_PARTITIONER_PARTITIONER_H diff --git a/lib/Partitioner/CMakeLists.txt b/lib/Partitioner/CMakeLists.txt index d3e304a3a0..e37ec05361 100644 --- a/lib/Partitioner/CMakeLists.txt +++ b/lib/Partitioner/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(Partitioner + PartitionerBase.cpp PartitionerUtils.cpp PartitionerOptimizer.cpp PartitionerValidation.cpp diff --git a/lib/Partitioner/Partitioner.cpp b/lib/Partitioner/Partitioner.cpp index ac3c104ef0..55069e02bb 100644 --- a/lib/Partitioner/Partitioner.cpp +++ b/lib/Partitioner/Partitioner.cpp @@ -21,10 +21,6 @@ #include "glow/Partitioner/PartitionerValidation.h" #include "glow/Support/Support.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" - #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -64,61 +60,6 @@ bool sortMinMemory(const std::pair &a, return a.second < b.second; } -void Partitioner::dumpDAG(llvm::StringRef dotFilename) const { - if (partitions_.size() == 0) - return; - auto *root = partitions_[0].root.get(); - LOG(INFO) << "Writing dotty graph for DAG after graph partitioning: " - << dotFilename.str(); - std::ofstream myfile; - myfile.open(dotFilename); - myfile << "digraph DAG {\n\trankdir=TB;\n"; - // Dump DAGNodes - std::vector nodes; - llvm::SmallSet used; - nodes.push_back(root); - int cur = 0; - int num = 1; - while (cur < num) { - auto *node = nodes[cur]; - for (size_t i = 0; i < node->children.size(); i++) { - auto child = node->children[i]; - DescriptionBuilder db(child->name.c_str()); - const std::string &backendName = child->backendName; - db.addParam("BackendName", backendName); - myfile << "\"" << escapeDottyString(child->name) << "\"" - << " [ label = \"" << escapeDottyString(db) << "\""; - myfile << "\tshape = \"record\"\n"; - myfile << "\tstyle=\"filled,rounded\"\n"; - auto colorIdx = llvm::hash_value(backendName); - myfile << "\tfillcolor=" << getDotFileNodeColor(colorIdx) << "\n"; - myfile << "penwidth = 2];\n"; - if (used.count(child) == 0) { - nodes.push_back(child); - used.insert(child); - num++; - } - } - cur++; - } - - // Dump edges. - for (size_t i = 0; i < nodes.size(); i++) { - auto *root = nodes[i]; - for (size_t j = 0; j < root->children.size(); j++) { - auto child = root->children[j]; - myfile << "\"" << escapeDottyString(root->name) << "\"" - << " -> " - << "\"" << escapeDottyString(child->name) << "\"" - << ";"; - } - } - myfile << "}"; - - myfile.close(); - return; -} - Partitioner::Partitioner(Module *parent, const std::vector &devices, const std::vector &backends, bool saturateHost, bool optimized) @@ -234,16 +175,14 @@ NodeToFunctionMap Partitioner::selectPartitions(Function *F, return mapping; } -/// Duplicate the network to saturate the number of devices. For example: If a -/// network is partitioned into two parts (\p logicalDeviceCount) and there are -/// six devices this would duplicate the network three times. -void Partitioner::saturateHost(unsigned logicalDeviceCount) { +void Partitioner::saturateHost(unsigned logicalDeviceCount, + const DAGListTy &partitions) { unsigned duplications = deviceInfo_.size() / logicalDeviceCount; if (duplications < 2) { return; } // Add additional logical devices to each node. - for (auto &network : partitions_) { + for (auto &network : partitions) { for (auto &node : network.nodes) { // Build list of new logical devices to add to node. std::vector newDevices; @@ -263,143 +202,9 @@ void Partitioner::saturateHost(unsigned logicalDeviceCount) { } } -/// Current only partition the representative function. -void Partitioner::doPartitioning(llvm::StringRef funcName, - std::vector funcs, - NodeToFunctionMap &mapping, bool saveDAG) { - // Add a dummy node to make sure that a DAG has a single entrance. - DAGNodePtr DAGRoot = llvm::make_unique(); - DAGNodePtrVec nodes; - DAGRoot->logicalDevices = {0}; - DAGRoot->name = funcName; - DAGRoot->module = module_; - DAGRoot->deviceIDs = {0}; - DAGNode *root = DAGRoot.get(); - - llvm::DenseMap currToNew; - - // Clone nodes into target partition. - for (size_t i = 0, e = funcs.size(); i < e; i++) { - for (auto &N : funcs[i]->getNodes()) { - auto *clone = N.clone(); - currToNew[&N] = clone; - mapping[&N]->addNode(clone); - } - } - - // For any dependency that crosses a partition, add a placeholder and save - // node. Record the dependence in the function graph. - std::unordered_map placeholders; - llvm::DenseMap funcDAG; - for (auto *subF : mapping.getPartitions()) { - if (funcDAG.find(subF) == funcDAG.end()) { - std::unique_ptr subDAG = llvm::make_unique(); - subDAG->name = subF->getName(); - subDAG->logicalDevices = mapping.getLogicalDeviceIDList(subF); - subDAG->backendName = mapping.getPartitionBackendName(subF); - funcDAG[subF] = subDAG.get(); - nodes.push_back(std::move(subDAG)); - } - - // Link subF to its parents. - std::set parents; - for (auto &N : subF->getNodes()) { - for (int inp = 0, e = N.getNumInputs(); inp < e; inp++) { - auto input = N.getNthInput(inp); - // No need to check Constant since it won't be the result of another - // function. - if (isa(input.getNode())) { - continue; - } - - Function *inputF = nullptr; - // It is possible that one input is the output of anther function. - if (Placeholder *ph = llvm::dyn_cast(input.getNode())) { - for (auto &user : ph->getUsers()) { - if (auto *save = llvm::dyn_cast(user.getUser())) { - placeholders[input] = save->getPlaceholder(); - inputF = mapping[user.getUser()]; - break; - } - } - if (!inputF) { - continue; - } - } - - if (!inputF) { - inputF = mapping[input.getNode()]; - } - if (subF == inputF) - continue; - - // Check if a DAGNode for subF's parent is created or not. If not, - // create one. - if (funcDAG.find(inputF) == funcDAG.end()) { - std::unique_ptr subDAG = llvm::make_unique(); - subDAG->name = inputF->getName(); - subDAG->logicalDevices = mapping.getLogicalDeviceIDList(inputF); - subDAG->backendName = mapping.getPartitionBackendName(inputF); - funcDAG[inputF] = subDAG.get(); - nodes.push_back(std::move(subDAG)); - } - - // subF is a child of inputF, inputF is a parent of subF. - if (parents.find(inputF) == parents.end()) { - funcDAG[inputF]->children.push_back(funcDAG[subF]); - funcDAG[subF]->parents.push_back(funcDAG[inputF]); - parents.insert(inputF); - } - // If we've already created a placeholder for this dependence, use it. - auto it = placeholders.find(input); - if (it != placeholders.end()) { - N.setNthInput(inp, it->second); - continue; - } - - // Create a new placeholder to represent this dependence. - auto *save = inputF->createSave("tmp", input); - auto *tmp = save->getPlaceholder(); - placeholders[input] = tmp; - N.setNthInput(inp, tmp); - } - } - } - - if (saveDAG) { - DAG dag; - dag.root = std::move(DAGRoot); - dag.nodes = std::move(nodes); - partitions_.push_back(std::move(dag)); - } - - // Update links between nodes in the cloned functions. Add placeholders (and - // save nodes) where a link crosses a partition boundary. - for (auto *subF : mapping.getPartitions()) { - for (auto &N : subF->getNodes()) { - for (int inp = 0, e = N.getNumInputs(); inp < e; inp++) { - auto input = N.getNthInput(inp); - if (isa(input.getNode())) - continue; - // Link this node to the clone of its input. - auto *clone = currToNew[input.getNode()]; - N.setNthInput(inp, NodeValue(clone, input.getResNo())); - } - } - } - - // For all DAGNode without parents, link them to the root DAG. - for (auto *subF : mapping.getPartitions()) { - if (funcDAG[subF]->parents.size() == 0) { - funcDAG[subF]->parents.push_back(root); - root->children.push_back(funcDAG[subF]); - } - } -} - -FunctionToBackendNameMap Partitioner::backendBasedPartition( - Function *F, std::vector &backends, CompilationContext &cctx) { - FunctionToBackendNameMap ret; +llvm::Expected Partitioner::backendBasedPartition( + FunctionToBackendNameMap &funcToBackend, Function *F, + std::vector &backends, CompilationContext &cctx) { NodeToFunctionMap mapping; llvm::DenseMap nodeToBackendName; @@ -437,8 +242,8 @@ FunctionToBackendNameMap Partitioner::backendBasedPartition( break; } } - assert(nodeToBackendName.find(&N) != nodeToBackendName.end() && - "Node is not supported by any of the provided backends"); + RETURN_ERR_IF_NOT(nodeToBackendName.find(&N) != nodeToBackendName.end(), + "Node is not supported by any of the provided backends"); } BFSLevel bfs = getBFSLevel(F); @@ -452,10 +257,10 @@ FunctionToBackendNameMap Partitioner::backendBasedPartition( // When profiling, all the partition backend is assigned to // profilingBackend. mapping.createPartition(newF, profilingBackend); - ret[newF] = profilingBackend; + funcToBackend[newF] = profilingBackend; } else { mapping.createPartition(newF, backendName); - ret[newF] = backendName; + funcToBackend[newF] = backendName; } for (int i = level - 1; i >= 0; i--) { for (size_t j = 0, e = bfs[i].size(); j < e; j++) { @@ -469,10 +274,10 @@ FunctionToBackendNameMap Partitioner::backendBasedPartition( // When profiling, all the partition backend is assigned to be // profilingBackend. mapping.createPartition(newF, profilingBackend); - ret[newF] = profilingBackend; + funcToBackend[newF] = profilingBackend; } else { mapping.createPartition(newF, backendName); - ret[newF] = backendName; + funcToBackend[newF] = backendName; } } mapping.add(N, newF); @@ -493,28 +298,26 @@ FunctionToBackendNameMap Partitioner::backendBasedPartition( mapping.appendLogicalDeviceID(func, logicalDeviceID++); } } - doPartitioning(F->getName(), funcs, mapping, genDAG); - - return ret; + return doPartitioning(F->getName(), funcs, module_, mapping, genDAG); } -void Partitioner::getBackendMap( +void Partitioner::genBackendMap( std::map &backendMap, std::vector> &backendsHolder, std::vector &backends) { // If the backends are created already, we use them directly. bool hasBackends = backends_.size() != 0; if (hasBackends) { - assert(backends_.size() == deviceInfo_.size() && - "number of backends and devices is not match."); + DCHECK(backends_.size() == deviceInfo_.size()) + << "number of backends and devices is not match."; } int n = 0; for (size_t i = 0, e = deviceInfo_.size(); i < e; i++) { std::string backendName = deviceInfo_[i].backendName; if (hasBackends) { - assert(backends_[i]->getBackendName() == backendName && - "Backend Type mismatch."); + DCHECK(backends_[i]->getBackendName() == backendName) + << "Backend Type mismatch."; } if (backendMap.find(backendName) == backendMap.end()) { BackendInfo backendInfo; @@ -545,9 +348,10 @@ void Partitioner::getBackendMap( } } -llvm::Error Partitioner::createDAGWithoutPartition( +llvm::Expected Partitioner::createDAGWithoutPartition( llvm::StringRef backendName, std::map &backendMap, CompilationContext &cctx) { + DAGListTy partitions; for (auto F : module_->getFunctions()) { if (!optimized_) { auto backend = backendMap[backendName].backend; @@ -565,13 +369,13 @@ llvm::Error Partitioner::createDAGWithoutPartition( DAG0->children.push_back(DAG1.get()); DAGNodePtrVec nodes; nodes.push_back(std::move(DAG1)); - partitions_.push_back({std::move(DAG0), std::move(nodes)}); + partitions.push_back({std::move(DAG0), std::move(nodes)}); } if (saturateHost_) { // Saturate the Host. - saturateHost(1); + saturateHost(1, partitions); } - return llvm::Error::success(); + return std::move(partitions); } llvm::Error Partitioner::loadBalancedPartitioning(Function *F, @@ -659,7 +463,7 @@ llvm::Error Partitioner::loadBalancedPartitioning(Function *F, auto curOpMemory = getNodeMemUsage(N); // Find a partition to put this node into - int curPartition = maxLogicalDeviceId; + DeviceIDTy curPartition = maxLogicalDeviceId; const float allowedLoadImbalanceFraction = 0.5f; for (; curPartition < numDevices; curPartition++) { // Put the op in current partition if @@ -695,7 +499,7 @@ llvm::Error Partitioner::loadBalancedPartitioning(Function *F, "Load balance partition error"); } } - for (int i = 0; i < numDevices; i++) { + for (size_t i = 0; i < numDevices; i++) { VLOG(1) << "Partition #" << i << " has estimated runtime " << deviceTime[i]; } @@ -703,17 +507,29 @@ llvm::Error Partitioner::loadBalancedPartitioning(Function *F, return llvm::Error::success(); } -llvm::Error Partitioner::QuantizationProfilingPartition( - CompilationContext &cctx, Function *F, std::vector backends) { +llvm::Expected +Partitioner::quantizationProfilingPartition(CompilationContext &cctx) { + // For quantization profiling flow, currently we assume there is only 1 + // function in a module. + DCHECK(module_->getFunctions().size() == 1) + << "Invalid number of functions in a module. For quantization profiling " + "flow, the module can only contain 1 function"; + // Quantization profiling flow is run under CPU backend, so we don't really // need the concrete partition. The backendBasedPartition is necessary since // we need the mapping between quantized tensor and original tensor. + DAGListTy partitions; + std::vector backends; + genBackendMap(backendMap_, backendHolder, backends); + F_ = selectRepFunc(module_, memSize_); + FunctionToBackendNameMap funcToBackend; - funcToBackend = backendBasedPartition(F_, backends, cctx); + ASSIGN_VALUE_OR_RETURN_ERR( + partitions, backendBasedPartition(funcToBackend, F_, backends, cctx)); module_->eraseFunction(F_); std::unique_ptr backend(createBackend(profilingBackend)); for (Function *subF : module_->getFunctions()) { - assert(subF->verify() && "Conversion led to invalid function"); + DCHECK(subF->verify()) << "Conversion led to invalid function"; if (!optimized_) { RETURN_IF_ERR(::glow::optimizeFunction(subF, *backend, cctx)); } @@ -723,30 +539,20 @@ llvm::Error Partitioner::QuantizationProfilingPartition( << "Profiling a model to be partitioned cross different backends. Each " "sub-network will be optimized and run on cpu backend.\n"; } - return llvm::Error::success(); + return std::move(partitions); } -llvm::Error Partitioner::Partition(CompilationContext &cctx) { +llvm::Expected +Partitioner::heterogeneousPartition(CompilationContext &cctx) { + DAGListTy partitions; // Prepare the mapping between BackendName and BackendInfo. std::vector backends; - std::vector> backendHolder; - getBackendMap(backendMap_, backendHolder, backends); - - if (partitionConfig_.enabled()) { - // Jump into user-defined partition, and skip the following auto partition. - return PartitionFromConfig(); - } + genBackendMap(backendMap_, backendHolder, backends); // Step 0: Find the representative function for running partitioning // algorithm. F_ = selectRepFunc(module_, memSize_); - if (cctx.precisionConfig.quantMode == QuantizationMode::Profile) { - // Jump into profiling flow, and leave without generating partitions for the - // backends with same type.. - return QuantizationProfilingPartition(cctx, F_, backends); - } - // Step 1 : do the partition based on backends type. FunctionToBackendNameMap funcToBackend; std::string origName(F_->getName().data()); @@ -767,8 +573,15 @@ llvm::Error Partitioner::Partition(CompilationContext &cctx) { } return createDAGWithoutPartition(backendName, backendMap_, cctx); } + DCHECK(module_->getFunctions().size() == 1) + << "Invalid number of functions in a module. Now in heterogeneouse " + "partition flow, the module can only contain 1 function"; } else { - funcToBackend = backendBasedPartition(F_, backends, cctx); + DCHECK(module_->getFunctions().size() == 1) + << "Invalid number of functions in a module. Now in heterogeneouse " + "partition flow, the module can only contain 1 function"; + ASSIGN_VALUE_OR_RETURN_ERR( + partitions, backendBasedPartition(funcToBackend, F_, backends, cctx)); module_->eraseFunction(F_); } @@ -781,7 +594,7 @@ llvm::Error Partitioner::Partition(CompilationContext &cctx) { auto *backend = backendMap_[i->second].backend; auto availMem = backendMap_[i->second].memSize; funcs.push_back(func); - assert(func->verify() && "Conversion led to invalid function"); + DCHECK(func->verify()) << "Conversion led to invalid function"; // Step 2.1 : optimize a function if it has not been optimized yet. if (!optimized_) { RETURN_IF_ERR(::glow::optimizeFunction(func, *backend, cctx)); @@ -825,10 +638,11 @@ llvm::Error Partitioner::Partition(CompilationContext &cctx) { } // Step 5 : do the real partitioning for the function list. - doPartitioning(origName, funcs, mapping, true); + partitions = + doPartitioning(origName, funcs, module_, mapping, /* saveDAG */ true); // DAG validation. - RETURN_IF_ERR(dagValidation(partitions_[0])); + RETURN_IF_ERR(dagValidation(partitions[0])); // Step 6 : Post-partition optimization - Adjust the logicalDevice for each // DAGNode. @@ -837,7 +651,7 @@ llvm::Error Partitioner::Partition(CompilationContext &cctx) { // Attempt to saturate the host when there is only one type of backend. // Passing in the count of logical devices. Since logicalId starts at 0 we // add one. - saturateHost(logicalDeviceID_); + saturateHost(logicalDeviceID_, partitions); } // Step 7 : clean up and verify the generated new functions. @@ -849,7 +663,7 @@ llvm::Error Partitioner::Partition(CompilationContext &cctx) { if (logPartition) { LOG(INFO) << "The number of partitions is : " << funcList.size() << ", and the DAG is dumped into DAG.dot file.\n"; - dumpDAG("DAG.dot"); + dumpDAG("DAG.dot", partitions); } for (Function *subF : funcList) { @@ -859,49 +673,53 @@ llvm::Error Partitioner::Partition(CompilationContext &cctx) { "__" + subF->getName().str() + "__" + mapping.getPartitionBackendName(subF) + ".dot"); } - assert(subF->verify() && "Conversion led to invalid function"); + DCHECK(subF->verify()) << "Conversion led to invalid function"; } if (logPartition) { logPartitionInfo(mapping); } - return llvm::Error::success(); + + return std::move(partitions); } -llvm::Error Partitioner::PartitionFromConfig() { - Function *F = module_->getFunction(partitionConfig_.funcName); +llvm::Expected +Partitioner::partitionFromConfig(const PartitionConfig &partitionConfig) { + DAGListTy partitions; + // Prepare the mapping between BackendName and BackendInfo. + std::vector backends; + genBackendMap(backendMap_, backendHolder, backends); + Function *F = module_->getFunction(partitionConfig.funcName); RETURN_ERR_IF_NOT(F, strFormat("Can't find function %s in current module.", F->getName().str().data())); - DCHECK(partitionConfig_.numOfPartitions == - partitionConfig_.backendNames.size() && - partitionConfig_.numOfPartitions == - partitionConfig_.partitionNames.size()) + DCHECK( + partitionConfig.numOfPartitions == partitionConfig.backendNames.size() && + partitionConfig.numOfPartitions == partitionConfig.partitionNames.size()) << "Invalid user-defined partition config."; NodeToFunctionMap partitionMap; std::vector funcList; std::unordered_set unused; - std::vector nodesSets(partitionConfig_.numOfPartitions); + std::vector nodesSets(partitionConfig.numOfPartitions); // Create partitions based on the given number and names. - for (size_t i = 0; i < partitionConfig_.numOfPartitions; i++) { - Function *newF = - module_->createFunction(partitionConfig_.partitionNames[i]); + for (size_t i = 0; i < partitionConfig.numOfPartitions; i++) { + Function *newF = module_->createFunction(partitionConfig.partitionNames[i]); funcList.push_back(newF); - partitionMap.createPartition(newF, partitionConfig_.backendNames[i]); + partitionMap.createPartition(newF, partitionConfig.backendNames[i]); unused.insert(i); } // Map the nodes the the partitions. std::vector unMapped; for (auto &node : F->getNodes()) { - auto iter = partitionConfig_.nodeToPartition.find(node.getName()); - if (iter == partitionConfig_.nodeToPartition.end()) { + auto iter = partitionConfig.nodeToPartition.find(node.getName()); + if (iter == partitionConfig.nodeToPartition.end()) { // If a node in F is not in the node to partition mapping, put it into // unMaped list. unMapped.push_back(&node); } else { size_t partitionID = iter->second; - DCHECK(partitionID < partitionConfig_.numOfPartitions) + DCHECK(partitionID < partitionConfig.numOfPartitions) << "Invalid partition id :" << partitionID; partitionMap.add(&node, funcList[partitionID]); unused.erase(partitionID); @@ -921,7 +739,7 @@ llvm::Error Partitioner::PartitionFromConfig() { } // Validate memory usage. - for (size_t i = 0; i < partitionConfig_.numOfPartitions; i++) { + for (size_t i = 0; i < partitionConfig.numOfPartitions; i++) { GraphMemInfo cost = getGraphMemInfo(nodesSets[i]); partitionMap.setGraphMemInfo(funcList[i], cost); } @@ -932,18 +750,19 @@ llvm::Error Partitioner::PartitionFromConfig() { RETURN_IF_ERR(logicalDevicesValidation(partitionMap, backendMap_)); // Do partition. - doPartitioning(F->getName(), {F}, partitionMap, true); + partitions = doPartitioning(F->getName(), {F}, module_, partitionMap, + /* saveDAG */ true); module_->eraseFunction(F); // DAG validation. - RETURN_IF_ERR(dagValidation(partitions_[0])); + RETURN_IF_ERR(dagValidation(partitions[0])); // Do optimization based on backendName. - for (size_t i = 0; i < partitionConfig_.numOfPartitions; i++) { + for (size_t i = 0; i < partitionConfig.numOfPartitions; i++) { auto func = funcList[i]; - assert(func->verify() && "Conversion led to invalid function"); + DCHECK(func->verify()) << "Conversion led to invalid function"; std::unique_ptr backend( - createBackend(partitionConfig_.backendNames[i])); + createBackend(partitionConfig.backendNames[i])); if (!optimized_) { CompilationContext cctx; RETURN_IF_ERR(::glow::optimizeFunction(func, *backend, cctx)); @@ -952,5 +771,20 @@ llvm::Error Partitioner::PartitionFromConfig() { if (logPartition) { logPartitionInfo(partitionMap); } - return llvm::Error::success(); + return std::move(partitions); +} + +llvm::Expected Partitioner::partition(CompilationContext &cctx) { + if (partitionConfig_.enabled()) { + // Call user-defined partition flow. + return partitionFromConfig(partitionConfig_); + } + + if (cctx.precisionConfig.quantMode == QuantizationMode::Profile) { + // Call quantization profiling partition flow. + return quantizationProfilingPartition(cctx); + } + + // Call heterogeneous partition flow. + return heterogeneousPartition(cctx); } diff --git a/lib/Partitioner/PartitionerBase.cpp b/lib/Partitioner/PartitionerBase.cpp new file mode 100644 index 0000000000..d4232b2fd4 --- /dev/null +++ b/lib/Partitioner/PartitionerBase.cpp @@ -0,0 +1,220 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "glow/Partitioner/PartitionerBase.h" +#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace glow; +using llvm::isa; + +// Current only partition the representative function. +DAGListTy PartitionerBase::doPartitioning(llvm::StringRef funcName, + std::vector funcs, + Module *module, + NodeToFunctionMap &mapping, + bool saveDAG) { + DAGListTy partitions; + // Add a dummy node to make sure that a DAG has a single entrance. + DAGNodePtr DAGRoot = llvm::make_unique(); + DAGNodePtrVec nodes; + DAGRoot->logicalDevices = {0}; + DAGRoot->name = funcName; + DAGRoot->module = module; + DAGRoot->deviceIDs = {0}; + DAGNode *root = DAGRoot.get(); + + llvm::DenseMap currToNew; + + // Clone nodes into target partition. + for (size_t i = 0, e = funcs.size(); i < e; i++) { + for (auto &N : funcs[i]->getNodes()) { + auto *clone = N.clone(); + currToNew[&N] = clone; + mapping[&N]->addNode(clone); + } + } + + // For any dependency that crosses a partition, add a placeholder and save + // node. Record the dependence in the function graph. + std::unordered_map placeholders; + llvm::DenseMap funcDAG; + for (auto *subF : mapping.getPartitions()) { + if (funcDAG.find(subF) == funcDAG.end()) { + std::unique_ptr subDAG = llvm::make_unique(); + subDAG->name = subF->getName(); + subDAG->logicalDevices = mapping.getLogicalDeviceIDList(subF); + subDAG->backendName = mapping.getPartitionBackendName(subF); + funcDAG[subF] = subDAG.get(); + nodes.push_back(std::move(subDAG)); + } + + // Link subF to its parents. + std::set parents; + for (auto &N : subF->getNodes()) { + for (int inp = 0, e = N.getNumInputs(); inp < e; inp++) { + auto input = N.getNthInput(inp); + // No need to check Constant since it won't be the result of another + // function. + if (isa(input.getNode())) { + continue; + } + + Function *inputF = nullptr; + // It is possible that one input is the output of anther function. + if (Placeholder *ph = llvm::dyn_cast(input.getNode())) { + for (auto &user : ph->getUsers()) { + if (auto *save = llvm::dyn_cast(user.getUser())) { + placeholders[input] = save->getPlaceholder(); + inputF = mapping[user.getUser()]; + break; + } + } + if (!inputF) { + continue; + } + } + + if (!inputF) { + inputF = mapping[input.getNode()]; + } + if (subF == inputF) { + continue; + } + // Check if a DAGNode for subF's parent is created or not. If not, + // create one. + if (funcDAG.find(inputF) == funcDAG.end()) { + std::unique_ptr subDAG = llvm::make_unique(); + subDAG->name = inputF->getName(); + subDAG->logicalDevices = mapping.getLogicalDeviceIDList(inputF); + subDAG->backendName = mapping.getPartitionBackendName(inputF); + funcDAG[inputF] = subDAG.get(); + nodes.push_back(std::move(subDAG)); + } + + // subF is a child of inputF, inputF is a parent of subF. + if (parents.find(inputF) == parents.end()) { + funcDAG[inputF]->children.push_back(funcDAG[subF]); + funcDAG[subF]->parents.push_back(funcDAG[inputF]); + parents.insert(inputF); + } + // If we've already created a placeholder for this dependence, use it. + auto it = placeholders.find(input); + if (it != placeholders.end()) { + N.setNthInput(inp, it->second); + continue; + } + + // Create a new placeholder to represent this dependence. + auto *save = inputF->createSave("tmp", input); + auto *tmp = save->getPlaceholder(); + placeholders[input] = tmp; + N.setNthInput(inp, tmp); + } + } + } + + if (saveDAG) { + DAG dag; + dag.root = std::move(DAGRoot); + dag.nodes = std::move(nodes); + partitions.push_back(std::move(dag)); + } + + // Update links between nodes in the cloned functions. Add placeholders (and + // save nodes) where a link crosses a partition boundary. + for (auto *subF : mapping.getPartitions()) { + for (auto &N : subF->getNodes()) { + for (int inp = 0, e = N.getNumInputs(); inp < e; inp++) { + auto input = N.getNthInput(inp); + if (isa(input.getNode())) { + continue; + } + // Link this node to the clone of its input. + auto *clone = currToNew[input.getNode()]; + N.setNthInput(inp, NodeValue(clone, input.getResNo())); + } + } + } + + // For all DAGNode without parents, link them to the root DAG. + for (auto *subF : mapping.getPartitions()) { + if (funcDAG[subF]->parents.size() == 0) { + funcDAG[subF]->parents.push_back(root); + root->children.push_back(funcDAG[subF]); + } + } + return partitions; +} + +void PartitionerBase::dumpDAG(llvm::StringRef dotFilename, + const DAGListTy &partitions) const { + if (partitions.size() == 0) { + return; + } + auto *root = partitions[0].root.get(); + LOG(INFO) << "Writing dotty graph for DAG after graph partitioning: " + << dotFilename.str(); + std::ofstream myfile; + myfile.open(dotFilename); + myfile << "digraph DAG {\n\trankdir=TB;\n"; + // Dump DAGNodes + std::vector nodes; + llvm::SmallSet used; + nodes.push_back(root); + int cur = 0; + int num = 1; + while (cur < num) { + auto *node = nodes[cur]; + for (size_t i = 0; i < node->children.size(); i++) { + auto child = node->children[i]; + DescriptionBuilder db(child->name.c_str()); + const std::string &backendName = child->backendName; + db.addParam("BackendName", backendName); + myfile << "\"" << escapeDottyString(child->name) << "\"" + << " [ label = \"" << escapeDottyString(db) << "\""; + myfile << "\tshape = \"record\"\n"; + myfile << "\tstyle=\"filled,rounded\"\n"; + auto colorIdx = llvm::hash_value(backendName); + myfile << "\tfillcolor=" << getDotFileNodeColor(colorIdx) << "\n"; + myfile << "penwidth = 2];\n"; + if (used.count(child) == 0) { + nodes.push_back(child); + used.insert(child); + num++; + } + } + cur++; + } + + // Dump edges. + for (size_t i = 0; i < nodes.size(); i++) { + auto *node = nodes[i]; + for (size_t j = 0; j < node->children.size(); j++) { + auto child = node->children[j]; + myfile << "\"" << escapeDottyString(node->name) << "\"" + << " -> " + << "\"" << escapeDottyString(child->name) << "\"" + << ";"; + } + } + myfile << "}"; + + myfile.close(); + return; +} diff --git a/lib/Runtime/HostManager/HostManager.cpp b/lib/Runtime/HostManager/HostManager.cpp index 359c4bb59c..fed0e0bca6 100644 --- a/lib/Runtime/HostManager/HostManager.cpp +++ b/lib/Runtime/HostManager/HostManager.cpp @@ -131,9 +131,9 @@ llvm::Error HostManager::addNetwork(std::unique_ptr module, for (Function *F : module->getFunctions()) { RETURN_IF_ERR(optimizeFunctionBeforeLowering(F, cctx)); } - auto partitioner = Partitioner(module.get(), deviceInfo, saturateHost); - RETURN_IF_ERR(partitioner.Partition(cctx)); - auto nodeList = std::move(partitioner.getPartitionResult()); + Partitioner partitioner(module.get(), deviceInfo, saturateHost); + DAGListTy nodeList; + ASSIGN_VALUE_OR_RETURN_ERR(nodeList, partitioner.partition(cctx)); if (cctx.precisionConfig.quantMode == QuantizationMode::Profile) { // Since for profiling the provisioner will be reset, we only allow one diff --git a/tests/unittests/PartitionerTest.cpp b/tests/unittests/PartitionerTest.cpp index 43559ed839..9731bb3486 100644 --- a/tests/unittests/PartitionerTest.cpp +++ b/tests/unittests/PartitionerTest.cpp @@ -163,18 +163,17 @@ TEST_F(PartitionerTest, Basic1) { {3072, "Interpreter"}, {3072, "Interpreter"}, {3072, "Interpreter"}}; Partitioner myPartitioner(&EEP.getModule(), devices, false, true); CompilationContext cctx; - auto err = myPartitioner.Partition(cctx); - EXPECT_FALSE(errToBool(std::move(err))); - DAGListTy dagList = std::move(myPartitioner.getPartitionResult()); + auto dagList = myPartitioner.partition(cctx); + EXPECT_TRUE((bool)dagList); EXPECT_EQ(EEP.getModule().getFunctions().size(), 3); - EXPECT_EQ(dagList.size(), 1); + EXPECT_EQ(dagList->size(), 1); EXPECT_TRUE(checkSaveNode(EEP.getModule())); // Run the paritioned graph and compare the results. bindings_.clear(); bindings_.allocate(EEP.getModule().getPlaceholders()); EEP.compile(cctx); - for (auto it = dagList.begin(); it != dagList.end(); ++it) { + for (auto it = dagList->begin(); it != dagList->end(); ++it) { executeDAG((*it).root.get(), EEP.getModule(), bindings_, {bindings_.getPlaceholderByName("input")}, {&in}, &EEP); Tensor test = bindings_.get(bindings_.getPlaceholderByName("ret"))->clone(); @@ -251,14 +250,12 @@ TEST_F(PartitionerTest, Basic2) { {2048, "Interpreter"}}; Partitioner myPartitioner(&EEP.getModule(), devices, /* saturateHost */ true); CompilationContext cctx; - auto err = myPartitioner.Partition(cctx); - EXPECT_FALSE(errToBool(std::move(err))); - DAGListTy dagList = std::move(myPartitioner.getPartitionResult()); + auto dagList = myPartitioner.partition(cctx); EXPECT_EQ(EEP.getModule().getFunctions().size(), 2); - EXPECT_EQ(dagList.size(), 1); + EXPECT_EQ(dagList->size(), 1); ASSERT_TRUE(checkSaveNode(EEP.getModule())); - for (auto &dag : dagList) { + for (auto &dag : dagList.get()) { for (auto &node : dag.nodes) { // Since saturateHost is set true, in this case, there should be 2 copys // of the partitions. @@ -270,7 +267,7 @@ TEST_F(PartitionerTest, Basic2) { bindings_.clear(); bindings_.allocate(EEP.getModule().getPlaceholders()); EEP.compile(cctx); - for (auto it = dagList.begin(); it != dagList.end(); ++it) { + for (auto it = dagList->begin(); it != dagList->end(); ++it) { updateInputPlaceholders(bindings_, {bindings_.getPlaceholderByName("input"), bindings_.getPlaceholderByName("input1")}, @@ -346,74 +343,61 @@ TEST_F(PartitionerTest, Error1) { std::vector devices = {{2048, "Interpreter"}}; Partitioner myPartitioner(&EEP.getModule(), devices); CompilationContext cctx; - auto err = myPartitioner.Partition(cctx); - EXPECT_TRUE(errToBool(std::move(err))); + auto dagList = myPartitioner.partition(cctx); + EXPECT_FALSE((bool)dagList); } /// This one tests the roofline computed with compute, memory and /// communication costs TEST_F(PartitionerTest, Basic1Roofline) { - ExecutionEngine EER, EEP; + ExecutionEngine EEP; constexpr float range = 2.0; - std::vector engines{&EER, &EEP}; - for (auto EE : engines) { - auto mod = &EE->getModule(); - F_ = mod->createFunction("main"); - auto *input = - mod->createPlaceholder(ElemKind::FloatTy, {1, 32}, "input", false); - auto *w1 = mod->createConstant(ElemKind::FloatTy, {32, 16}, "w1"); - auto *b1 = mod->createConstant(ElemKind::FloatTy, {16}, "b1"); - bindings_.allocate(input); - w1->getHandle<>().randomize(-range, range, mod->getPRNG()); - b1->getHandle<>().randomize(-range, range, mod->getPRNG()); - - // Initial FC. - Node *I = F_->createFullyConnected("initial_fc", input, w1, b1); - I = F_->createSigmoid("initial_sigmoid", I); - // Left branch. - auto *w2 = mod->createConstant(ElemKind::FloatTy, {16, 16}, "w2"); - auto *b2 = mod->createConstant(ElemKind::FloatTy, {16}, "b2"); - w2->getHandle<>().randomize(-range, range, mod->getPRNG()); - b2->getHandle<>().randomize(-range, range, mod->getPRNG()); - Node *L = F_->createFullyConnected("left_fc1", I, w2, b2); - L = F_->createSigmoid("left_sigmoid1", L); - auto *w3 = mod->createConstant(ElemKind::FloatTy, {16, 8}, "w3"); - auto *b3 = mod->createConstant(ElemKind::FloatTy, {8}, "b3"); - w3->getHandle<>().randomize(-range, range, mod->getPRNG()); - b3->getHandle<>().randomize(-range, range, mod->getPRNG()); - L = F_->createFullyConnected("left_fc2", L, w3, b3); - L = F_->createSigmoid("left_sigmoid2", L); + auto mod = &EEP.getModule(); + F_ = mod->createFunction("main"); + auto *input = + mod->createPlaceholder(ElemKind::FloatTy, {1, 32}, "input", false); + auto *w1 = mod->createConstant(ElemKind::FloatTy, {32, 16}, "w1"); + auto *b1 = mod->createConstant(ElemKind::FloatTy, {16}, "b1"); + bindings_.allocate(input); + w1->getHandle<>().randomize(-range, range, mod->getPRNG()); + b1->getHandle<>().randomize(-range, range, mod->getPRNG()); - // Right branch. - auto *w4 = mod->createConstant(ElemKind::FloatTy, {16, 16}, "w4"); - auto *b4 = mod->createConstant(ElemKind::FloatTy, {16}, "b4"); - w4->getHandle<>().randomize(-range, range, mod->getPRNG()); - b4->getHandle<>().randomize(-range, range, mod->getPRNG()); - Node *R = F_->createFullyConnected("right_fc1", I, w4, b4); - R = F_->createSigmoid("right_sigmoid1", R); - auto *w5 = mod->createConstant(ElemKind::FloatTy, {16, 8}, "w5"); - auto *b5 = mod->createConstant(ElemKind::FloatTy, {8}, "b5"); - w5->getHandle<>().randomize(-range, range, mod->getPRNG()); - b5->getHandle<>().randomize(-range, range, mod->getPRNG()); - R = F_->createFullyConnected("right_fc2", R, w5, b5); - R = F_->createSigmoid("right_sigmoid2", R); + // Initial FC. + Node *I = F_->createFullyConnected("initial_fc", input, w1, b1); + I = F_->createSigmoid("initial_sigmoid", I); - // Join branches. - auto *mul = F_->createMul("mul", L, R); - F_->createSave("ret", mul); - } + // Left branch. + auto *w2 = mod->createConstant(ElemKind::FloatTy, {16, 16}, "w2"); + auto *b2 = mod->createConstant(ElemKind::FloatTy, {16}, "b2"); + w2->getHandle<>().randomize(-range, range, mod->getPRNG()); + b2->getHandle<>().randomize(-range, range, mod->getPRNG()); + Node *L = F_->createFullyConnected("left_fc1", I, w2, b2); + L = F_->createSigmoid("left_sigmoid1", L); + auto *w3 = mod->createConstant(ElemKind::FloatTy, {16, 8}, "w3"); + auto *b3 = mod->createConstant(ElemKind::FloatTy, {8}, "b3"); + w3->getHandle<>().randomize(-range, range, mod->getPRNG()); + b3->getHandle<>().randomize(-range, range, mod->getPRNG()); + L = F_->createFullyConnected("left_fc2", L, w3, b3); + L = F_->createSigmoid("left_sigmoid2", L); - // Infer using the un-partitioned graph. - Tensor in(ElemKind::FloatTy, {1, 32}); - in.getHandle<>().randomize(-range, range, EER.getModule().getPRNG()); + // Right branch. + auto *w4 = mod->createConstant(ElemKind::FloatTy, {16, 16}, "w4"); + auto *b4 = mod->createConstant(ElemKind::FloatTy, {16}, "b4"); + w4->getHandle<>().randomize(-range, range, mod->getPRNG()); + b4->getHandle<>().randomize(-range, range, mod->getPRNG()); + Node *R = F_->createFullyConnected("right_fc1", I, w4, b4); + R = F_->createSigmoid("right_sigmoid1", R); + auto *w5 = mod->createConstant(ElemKind::FloatTy, {16, 8}, "w5"); + auto *b5 = mod->createConstant(ElemKind::FloatTy, {8}, "b5"); + w5->getHandle<>().randomize(-range, range, mod->getPRNG()); + b5->getHandle<>().randomize(-range, range, mod->getPRNG()); + R = F_->createFullyConnected("right_fc2", R, w5, b5); + R = F_->createSigmoid("right_sigmoid2", R); - EER.compile(CompilationMode::Infer); - bindings_.clear(); - bindings_.allocate(EER.getModule().getPlaceholders()); - updateInputPlaceholders(bindings_, {bindings_.getPlaceholderByName("input")}, - {&in}); - EER.run(bindings_); + // Join branches. + auto *mul = F_->createMul("mul", L, R); + F_->createSave("ret", mul); // Since the partitioner will look at all nodesin the function post // optimization and lowering, we need to do so here for the same list of @@ -477,8 +461,8 @@ TEST_F(PartitionerTest, SelectRepFunc) { {1000000, "Interpreter"}}); CompilationContext cctx; - auto err = myPartitioner.Partition(cctx); - EXPECT_FALSE(errToBool(std::move(err))); + auto dagList = myPartitioner.partition(cctx); + EXPECT_TRUE((bool)dagList); } /// Create a mock backend and rewrite the isOpSupported function @@ -612,16 +596,14 @@ TEST_F(PartitionerTest, SimpleHeterogeneousPartitioning) { backends.emplace_back(&backendWithoutSub1); std::vector devices = { {3072, "Interpreter"}, {3072, "Interpreter"}, {3072, "CPU"}}; - auto partitioner = - Partitioner(&mod_, devices, backends, /* saturateHost */ true); + Partitioner partitioner(&mod_, devices, backends, /* saturateHost */ true); CompilationContext cctx; - auto err = partitioner.Partition(cctx); - EXPECT_FALSE(errToBool(std::move(err))); - DAGListTy dagList = std::move(partitioner.getPartitionResult()); + auto dagList = partitioner.partition(cctx); + EXPECT_TRUE((bool)dagList); EXPECT_EQ(mod_.getFunctions().size(), 3); - EXPECT_EQ(dagList.size(), 1); + EXPECT_EQ(dagList->size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); - heterogeneousPartitionValidation(dagList, mod_); + heterogeneousPartitionValidation(dagList.get(), mod_); mod_.clear(); } @@ -634,15 +616,14 @@ TEST_F(PartitionerTest, heterogeneousPartitioningWithNonSupportedNodes) { std::vector devices = {{3072, "Interpreter", "Mul"}, {3072, "Interpreter", "Mul"}, {3072, "CPU", "Sub"}}; - auto partitioner = Partitioner(&mod_, devices); + Partitioner partitioner(&mod_, devices); CompilationContext cctx; - auto err = partitioner.Partition(cctx); - EXPECT_FALSE(errToBool(std::move(err))); - DAGListTy dagList = std::move(partitioner.getPartitionResult()); + auto dagList = partitioner.partition(cctx); + EXPECT_TRUE((bool)dagList); EXPECT_EQ(mod_.getFunctions().size(), 3); - EXPECT_EQ(dagList.size(), 1); + EXPECT_EQ(dagList->size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); - heterogeneousPartitionValidation(dagList, mod_); + heterogeneousPartitionValidation(dagList.get(), mod_); mod_.clear(); } @@ -658,15 +639,14 @@ TEST_F(PartitionerTest, heterogeneousPartitioningWithSupportedNodes) { {3072, "Interpreter", "", "Sub,Add,Save"}, {3072, "Interpreter", "", "Sub,Add,Save"}, {3072, "CPU", "", "Mul,Add,Save"}}; - auto partitioner = Partitioner(&mod_, devices); + Partitioner partitioner(&mod_, devices); CompilationContext cctx; - auto err = partitioner.Partition(cctx); - EXPECT_FALSE(errToBool(std::move(err))); - DAGListTy dagList = std::move(partitioner.getPartitionResult()); + auto dagList = partitioner.partition(cctx); + EXPECT_TRUE((bool)dagList); EXPECT_EQ(mod_.getFunctions().size(), 3); - EXPECT_EQ(dagList.size(), 1); + EXPECT_EQ(dagList->size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); - heterogeneousPartitionValidation(dagList, mod_); + heterogeneousPartitionValidation(dagList.get(), mod_); mod_.clear(); } @@ -694,17 +674,16 @@ TEST_F(PartitionerTest, logicalIDTest0) { {1500, "Interpreter"}}; // Create two backends which support different ops, then do the partition by // assigning the ops to the corresponding abackends. - auto partitioner = Partitioner(&mod_, devices, /* saturateHost */ true); + Partitioner partitioner(&mod_, devices, /* saturateHost */ true); CompilationContext cctx; - auto err = partitioner.Partition(cctx); - EXPECT_FALSE(errToBool(std::move(err))); - DAGListTy dagList = std::move(partitioner.getPartitionResult()); + auto dagList = partitioner.partition(cctx); + EXPECT_TRUE((bool)dagList); // Check there are 3 partitions. EXPECT_EQ(mod_.getFunctions().size(), 3); - EXPECT_EQ(dagList.size(), 1); + EXPECT_EQ(dagList->size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); - for (auto &dag : dagList) { + for (auto &dag : dagList.get()) { // Check number of logical devices; llvm::SmallSet usedID; for (auto &node : dag.nodes) { @@ -729,17 +708,15 @@ TEST_F(PartitionerTest, logicalIDTest1) { backends.emplace_back(&backendWithoutMul1); backends.emplace_back(&backendWithoutSub1); std::vector devices = {{3072, "Interpreter"}, {3072, "CPU"}}; - auto partitioner = - Partitioner(&mod_, devices, backends, /* saturateHost */ true); + Partitioner partitioner(&mod_, devices, backends, /* saturateHost */ true); CompilationContext cctx; - auto err = partitioner.Partition(cctx); - EXPECT_FALSE(errToBool(std::move(err))); - DAGListTy dagList = std::move(partitioner.getPartitionResult()); + auto dagList = partitioner.partition(cctx); + EXPECT_TRUE((bool)dagList); EXPECT_EQ(mod_.getFunctions().size(), 3); - EXPECT_EQ(dagList.size(), 1); + EXPECT_EQ(dagList->size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); - for (auto &dag : dagList) { + for (auto &dag : dagList.get()) { // Check number of logical devices; llvm::SmallSet usedID; for (auto &node : dag.nodes) { @@ -908,8 +885,8 @@ TEST_F(PartitionerTest, memoryUsageValidation1) { {500, "Interpreter"}}; Partitioner myPartitioner(&mod_, devices); CompilationContext cctx; - auto err = myPartitioner.Partition(cctx); - EXPECT_TRUE(errToBool(std::move(err))); + auto dagList = myPartitioner.partition(cctx); + EXPECT_FALSE((bool)dagList); } /// This one test dagValidation in partitioner : p1->p2, p2->p1. @@ -937,8 +914,8 @@ TEST_F(PartitionerTest, dagValidation1) { partitionConfig.nodeToPartition = {{"add2", 0}}; auto partitioner = Partitioner(&mod_, devices, false, false, partitionConfig); CompilationContext cctx; - auto err = partitioner.Partition(cctx); - EXPECT_TRUE(errToBool(std::move(err))); + auto dagList = partitioner.partition(cctx); + EXPECT_FALSE((bool)dagList); } /// This one test dagValidation in partitioner: p0->p1, p1->p2, p2->p1. @@ -969,8 +946,8 @@ TEST_F(PartitionerTest, dagValidation2) { partitionConfig.nodeToPartition = {{"add0", 0}, {"add2", 2}}; auto partitioner = Partitioner(&mod_, devices, false, false, partitionConfig); CompilationContext cctx; - auto err = partitioner.Partition(cctx); - EXPECT_TRUE(errToBool(std::move(err))); + auto dagList = partitioner.partition(cctx); + EXPECT_FALSE((bool)dagList); } /// This one tests partition from a user-defined config. @@ -987,13 +964,36 @@ TEST_F(PartitionerTest, partitionFromConfig) { partitionConfig.backendNames = {"Interpreter", "CPU", "Interpreter"}; partitionConfig.partitionNames = {"p1", "p2", "p3"}; partitionConfig.nodeToPartition = {{"sub", 0}, {"mul", 1}}; - auto partitioner = Partitioner(&mod_, devices, false, false, partitionConfig); + Partitioner partitioner(&mod_, devices, false, false, partitionConfig); + CompilationContext cctx; + auto dagList = partitioner.partition(cctx); + EXPECT_TRUE((bool)dagList); + EXPECT_EQ(mod_.getFunctions().size(), 3); + EXPECT_EQ(dagList->size(), 1); + ASSERT_TRUE(checkSaveNode(mod_)); + heterogeneousPartitionValidation(dagList.get(), mod_); +} + +/// This one tests calling PartitionFromConfig directly. +TEST_F(PartitionerTest, partitionFromConfigDirectCall) { + createSimpleModule(mod_); + std::vector devices = { + {3072, "Interpreter"}, {3072, "Interpreter"}, {3072, "CPU"}}; + + // User-defined partition: 3 partitions (2 interpreter, 1 cpu), Mul nodes to + // CPU, others to Interpreter. + PartitionConfig partitionConfig; + partitionConfig.funcName = "test"; + partitionConfig.numOfPartitions = 3; + partitionConfig.backendNames = {"Interpreter", "CPU", "Interpreter"}; + partitionConfig.partitionNames = {"p1", "p2", "p3"}; + partitionConfig.nodeToPartition = {{"sub", 0}, {"mul", 1}}; + Partitioner partitioner(&mod_, devices); CompilationContext cctx; - auto err = partitioner.Partition(cctx); - EXPECT_FALSE(errToBool(std::move(err))); - DAGListTy dagList = std::move(partitioner.getPartitionResult()); + auto dagList = partitioner.partitionFromConfig(partitionConfig); + EXPECT_TRUE((bool)dagList); EXPECT_EQ(mod_.getFunctions().size(), 3); - EXPECT_EQ(dagList.size(), 1); + EXPECT_EQ(dagList->size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); - heterogeneousPartitionValidation(dagList, mod_); + heterogeneousPartitionValidation(dagList.get(), mod_); } diff --git a/tests/unittests/RecommendationSystemTest.cpp b/tests/unittests/RecommendationSystemTest.cpp index fc05679595..4dc97dfd4e 100644 --- a/tests/unittests/RecommendationSystemTest.cpp +++ b/tests/unittests/RecommendationSystemTest.cpp @@ -949,13 +949,12 @@ class RecommendationSystemTest : public BackendTest { // Use the same precision transformation for compilation. CompilationContext cctx; cctx.precisionConfig = precConfig_; - EXIT_ON_ERR(myPartitioner.Partition(cctx)); - - DAGListTy myList = std::move(myPartitioner.getPartitionResult()); + auto myList = myPartitioner.partition(cctx); + ASSERT_TRUE((bool)myList); std::cout << "Partitions = " << pMod->getFunctions().size() << std::endl; ASSERT_LE(pMod->getFunctions().size(), numDevices); - ASSERT_EQ(myList.size(), 1); - DAG &dag = myList.front(); + ASSERT_EQ(myList->size(), 1); + DAG &dag = myList->front(); // Run the partitioned graph and compare the results.