diff --git a/include/glow/Partitioner/Partitioner.h b/include/glow/Partitioner/Partitioner.h index fe1af7b058..7357d08dd7 100644 --- a/include/glow/Partitioner/Partitioner.h +++ b/include/glow/Partitioner/Partitioner.h @@ -204,11 +204,14 @@ class Partitioner { /// use all available devices. bool saturateHost_; - // Flag to set if the funcitons in the module are areadly optimized. By - // default, the optimization should be done in Partitioner due to - // heterogeneous partition. + /// Flag to set if the funcitons in the module are areadly optimized. By + /// default, the optimization should be done in Partitioner due to + /// heterogeneous partition. bool optimized_; + /// The struct contain user-defined partition info. + PartitionConfig partitionConfig_; + /// Get the representative function (the one with the largest input) and /// update the memSize. static Function *selectRepFunc(Module *parent, uint64_t &memSize); @@ -295,10 +298,15 @@ class Partitioner { /// "Function Family", that is, without considerting the "dynamic stuff" (i.e. /// batch size, input/output shape of each op), all the functions are /// identical. The required memory and computation cost for each op can be - /// found in Module. The \p devices provides the cost model related to - /// devices. + /// found in Module. + /// The \p devices provides the cost model related to devices. + /// Saturating the host will be enabled if \p saturateHost is true. + /// \p optimized is false by default, which means the functions in this module + /// are not optimized. \p partitionConfig contains the user defined partition + /// info. Partitioner(Module *parent, const std::vector &devices, - bool saturateHost = false, bool optimized = false); + bool saturateHost = false, bool optimized = false, + PartitionConfig partitionConfig = PartitionConfig()); /// Users can create Mock Backends and pass their points to test Graph /// Partitioning without actually register them in GLOW. @@ -306,6 +314,10 @@ class Partitioner { const std::vector &backends, bool saturateHost = false, bool optimized = false); + /// Based on partitionConfig_ passed into Partitioner, do the user-defined + /// partition. + llvm::Error PartitionFromConfig(); + /// Decompose each function in a module. Now we support partitioning a module /// among different type of devices. \p cctx is used during optimization of /// the Function. \returns whether there was an error encountered. diff --git a/include/glow/Runtime/RuntimeTypes.h b/include/glow/Runtime/RuntimeTypes.h index 11527f4159..bc8aa1b052 100644 --- a/include/glow/Runtime/RuntimeTypes.h +++ b/include/glow/Runtime/RuntimeTypes.h @@ -171,6 +171,30 @@ struct HostConfig { size_t executorThreads{3}; }; +/// This is struct for user defined partition. +struct PartitionConfig { + /// The name of the function to be partitioned. + std::string funcName; + /// The number of user defined partitions. + /// The partition ids are between 0 and numOfPartitions - 1, inclusive. + size_t numOfPartitions; + /// The backend for each partition. backendNames.size() == numOfPartitions. + std::vector backendNames; + /// The name for each partition. partitionNames.size() == numOfPartitions. + std::vector partitionNames; + /// The mapping between nodes' name to Partition ids. Assume there are n nodes + /// and m partitions. We have 2 types of valid mapping: 1. all nodes are + /// mapped to a partition. 2. For i-th (0 <= i < m) partition, the nodes + /// mapped to this partition id are not in this map, and the nodes mapped to + /// other partitions ids must be in this map. The node's name should be the + /// name in Glow function and may be different from the original name from + /// models. Since Glow will mangle names to make them unique. + llvm::StringMap nodeToPartition; + + PartitionConfig() : numOfPartitions(0) {} + bool enabled() { return numOfPartitions > 0; } +}; + } // namespace runtime } // namespace glow #endif // GLOW_RUNTIME_RUNTIMETYPES_H diff --git a/lib/Partitioner/Partitioner.cpp b/lib/Partitioner/Partitioner.cpp index 7b7793afbb..e6ca9d98da 100644 --- a/lib/Partitioner/Partitioner.cpp +++ b/lib/Partitioner/Partitioner.cpp @@ -22,11 +22,10 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include - #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include namespace glow { bool GlowEnableLoadBalancedPartitioning = false; static llvm::cl::opt @@ -62,6 +61,20 @@ bool sortMinMemory(const std::pair &a, return a.second < b.second; } +static void dumpPartitionInfo(const NodeToFunctionMap &partitions) { + int i = 0; + for (Function *subF : partitions.getPartitions()) { + LOG(INFO) << "\t Partition " << i++ << ":\n" + << "\t\t Name :\t" << subF->getName().str() << "\n" + << "\t\t BackendKind :\t" + << partitions.getPartitionBackendName(subF) << "\n" + << "\t\t Memory :\t" + << partitions.getGraphMemInfo(subF).getTotalMemSize() << "\n" + << "\t\t LogicalDeviceIDs :\t" + << partitions.getLogicalDeviceIDList(subF)[0] << "\n"; + } +} + void Partitioner::dumpDAG(llvm::StringRef dotFilename) const { if (partitions_.size() == 0) return; @@ -168,9 +181,10 @@ Partitioner::Partitioner(Module *parent, const std::vector &devices, } Partitioner::Partitioner(Module *parent, const std::vector &devices, - bool saturateHost, bool optimized) + bool saturateHost, bool optimized, + PartitionConfig partitionConfig) : module_(parent), deviceInfo_(devices), saturateHost_(saturateHost), - optimized_(optimized) { + optimized_(optimized), partitionConfig_(partitionConfig) { memSize_ = module_->getConstantsSize(); logicalDeviceID_ = 0; } @@ -1211,7 +1225,6 @@ llvm::Error Partitioner::QuantizationProfilingPartition( module_->eraseFunction(F_); std::unique_ptr backend(createBackend(profilingBackend)); for (Function *subF : module_->getFunctions()) { - (void)subF; assert(subF->verify() && "Conversion led to invalid function"); if (!optimized_) { RETURN_IF_ERR(::glow::optimizeFunction(subF, *backend, cctx)); @@ -1231,6 +1244,11 @@ llvm::Error Partitioner::Partition(CompilationContext &cctx) { std::vector> backendHolder; getBackendMap(backendMap_, backendHolder, backends); + if (partitionConfig_.enabled()) { + // Jump into user-defined partition, and skip the following auto partition. + return PartitionFromConfig(); + } + // Step 0: Find the representative function for running partitioning // algorithm. F_ = selectRepFunc(module_, memSize_); @@ -1348,27 +1366,104 @@ llvm::Error Partitioner::Partition(CompilationContext &cctx) { dumpDAG("DAG.dot"); } - int i = 0; for (Function *subF : funcList) { - (void)subF; - if (logPartition) { - LOG(INFO) << "\t Partition " << i << ":\n" - << "\t\t Name :\t" << subF->getName().str() << "\n" - << "\t\t BackendKind :\t" - << mapping.getPartitionBackendName(subF) << "\n" - << "\t\t Memory :\t" - << mapping.getGraphMemInfo(subF).getTotalMemSize() << "\n" - << "\t\t LogicalDeviceIDs :\t" - << mapping.getLogicalDeviceIDList(subF)[0] << "\n"; - } if (dumpPartition) { subF->dumpDAG("partitionLogicalID" + std::to_string(mapping.getLogicalDeviceIDList(subF)[0]) + "__" + subF->getFilename() + "__" + mapping.getPartitionBackendName(subF) + ".dot"); } - i++; assert(subF->verify() && "Conversion led to invalid function"); } + if (logPartition) { + dumpPartitionInfo(mapping); + } + return llvm::Error::success(); +} + +llvm::Error Partitioner::PartitionFromConfig() { + Function *F = module_->getFunction(partitionConfig_.funcName); + RETURN_ERR_IF_NOT(F, strFormat("Can't find function %s in current module.", + F->getName().str().data())); + + DCHECK(partitionConfig_.numOfPartitions == + partitionConfig_.backendNames.size() && + partitionConfig_.numOfPartitions == + partitionConfig_.partitionNames.size()) + << "Invalid user-defined partition config."; + + NodeToFunctionMap partitionMap; + std::vector funcList; + std::unordered_set unused; + std::vector nodesSets(partitionConfig_.numOfPartitions); + // Create partitions based on the given number and names. + for (size_t i = 0; i < partitionConfig_.numOfPartitions; i++) { + Function *newF = + module_->createFunction(partitionConfig_.partitionNames[i]); + funcList.push_back(newF); + partitionMap.createPartition(newF, partitionConfig_.backendNames[i]); + unused.insert(i); + } + + // Map the nodes the the partitions. + std::vector unMapped; + for (auto &node : F->getNodes()) { + auto iter = partitionConfig_.nodeToPartition.find(node.getName()); + if (iter == partitionConfig_.nodeToPartition.end()) { + // If a node in F is not in the node to partition mapping, put it into + // unMaped list. + unMapped.push_back(&node); + } else { + size_t partitionID = iter->second; + DCHECK(partitionID < partitionConfig_.numOfPartitions) + << "Invalid partition id :" << partitionID; + partitionMap.add(&node, funcList[partitionID]); + unused.erase(partitionID); + nodesSets[partitionID].insert(&node); + } + } + + // If there is unused partition and unmapped nodes, map those nodes to the + // unused partition. + if (unMapped.size()) { + DCHECK(unused.size() == 1) << "There must be exactly 1 unused partition."; + auto partitionID = *(unused.begin()); + for (auto &node : unMapped) { + partitionMap.add(node, funcList[partitionID]); + nodesSets[partitionID].insert(node); + } + } + + // Validate memory usage. + for (size_t i = 0; i < partitionConfig_.numOfPartitions; i++) { + GraphMemInfo cost = getGraphMemInfo(nodesSets[i]); + partitionMap.setGraphMemInfo(funcList[i], cost); + } + RETURN_IF_ERR(memoryUsageValidation(partitionMap)); + + // Logical device ID validation. + logicalDeviceID_ = assignLogicalDeviceID(partitionMap); + RETURN_IF_ERR(logicalDevicesValidation(partitionMap)); + + // TODO : loop-free validation. + + // Do partition. + doPartitioning(F->getName(), {F}, partitionMap, true); + module_->eraseFunction(F); + + // Do optimization based on backendName. + for (size_t i = 0; i < partitionConfig_.numOfPartitions; i++) { + auto func = funcList[i]; + assert(func->verify() && "Conversion led to invalid function"); + std::unique_ptr backend( + createBackend(partitionConfig_.backendNames[i])); + if (!optimized_) { + CompilationContext cctx; + RETURN_IF_ERR(::glow::optimizeFunction(func, *backend, cctx)); + } + } + if (logPartition) { + dumpPartitionInfo(partitionMap); + } return llvm::Error::success(); } diff --git a/tests/unittests/PartitionerTest.cpp b/tests/unittests/PartitionerTest.cpp index 5ece0fdc0f..f6703b92c8 100644 --- a/tests/unittests/PartitionerTest.cpp +++ b/tests/unittests/PartitionerTest.cpp @@ -155,9 +155,9 @@ TEST_F(PartitionerTest, Basic1) { auto err = myPartitioner.Partition(cctx); EXPECT_FALSE(errToBool(std::move(err))); DAGListTy dagList = std::move(myPartitioner.getPartitionResult()); - ASSERT_EQ(mod_.getFunctions().size(), 3); - ASSERT_EQ(dagList.size(), 1); - ASSERT_TRUE(checkSaveNode(mod_)); + EXPECT_EQ(mod_.getFunctions().size(), 3); + EXPECT_EQ(dagList.size(), 1); + EXPECT_TRUE(checkSaveNode(mod_)); // Run the paritioned graph and compare the results. bindings_.allocate(mod_.getPlaceholders()); @@ -230,15 +230,15 @@ TEST_F(PartitionerTest, Basic2) { auto err = myPartitioner.Partition(cctx); EXPECT_FALSE(errToBool(std::move(err))); DAGListTy dagList = std::move(myPartitioner.getPartitionResult()); - ASSERT_EQ(mod_.getFunctions().size(), 2); - ASSERT_EQ(dagList.size(), 1); + EXPECT_EQ(mod_.getFunctions().size(), 2); + EXPECT_EQ(dagList.size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); for (auto &dag : dagList) { for (auto &node : dag.nodes) { // Since saturateHost is set true, in this case, there should be 2 copys // of the partitions. - ASSERT_EQ(node->logicalDevices.size(), 2); + EXPECT_EQ(node->logicalDevices.size(), 2); } } @@ -248,7 +248,7 @@ TEST_F(PartitionerTest, Basic2) { bindings_.allocate(mod_.getPlaceholders()); executeDAG((*it).root.get(), mod_, bindings_, {input}, {&in}); Tensor test = res.clone(); - EXPECT_TRUE(ref.isEqual(test)); + ASSERT_TRUE(ref.isEqual(test)); } } @@ -433,8 +433,8 @@ TEST_F(PartitionerTest, Basic1Roofline) { EXPECT_EQ(myPartitioner.getMemUsage(N), expectedMemUsage[p.second]); } - ASSERT_EQ(mod_.getFunctions().size(), 3); - ASSERT_EQ(dagList.size(), 1); + EXPECT_EQ(mod_.getFunctions().size(), 3); + EXPECT_EQ(dagList.size(), 1); } TEST_F(PartitionerTest, SelectRepFunc) { @@ -549,7 +549,7 @@ static void heterogeneousPartitionValidation(const DAGListTy &dagList, for (auto &node : dag.nodes) { // Although the saturateHost is set true, no saturating the host in // heterogeneous partiton. - ASSERT_EQ(node->logicalDevices.size(), 1); + EXPECT_EQ(node->logicalDevices.size(), 1); if (node->backendName == "CPU") { numOfCPUBackends++; auto func = mod.getFunction(node->name); @@ -568,10 +568,10 @@ static void heterogeneousPartitionValidation(const DAGListTy &dagList, } } } - ASSERT_EQ(numOfInterpreterBackends, 2); - ASSERT_EQ(numOfCPUBackends, 1); - ASSERT_EQ(numOfSubNodes, 2); - ASSERT_EQ(numOfMulNodes, 1); + EXPECT_EQ(numOfInterpreterBackends, 2); + EXPECT_EQ(numOfCPUBackends, 1); + EXPECT_EQ(numOfSubNodes, 2); + EXPECT_EQ(numOfMulNodes, 1); } /// Test using user-defined backends for heterogeneous partition. @@ -593,8 +593,8 @@ TEST_F(PartitionerTest, SimpleHeterogeneousPartitioning) { auto err = partitioner.Partition(cctx); EXPECT_FALSE(errToBool(std::move(err))); DAGListTy dagList = std::move(partitioner.getPartitionResult()); - ASSERT_EQ(mod_.getFunctions().size(), 3); - ASSERT_EQ(dagList.size(), 1); + EXPECT_EQ(mod_.getFunctions().size(), 3); + EXPECT_EQ(dagList.size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); heterogeneousPartitionValidation(dagList, mod_); @@ -614,8 +614,8 @@ TEST_F(PartitionerTest, heterogeneousPartitioningWithNonSupportedNodes) { auto err = partitioner.Partition(cctx); EXPECT_FALSE(errToBool(std::move(err))); DAGListTy dagList = std::move(partitioner.getPartitionResult()); - ASSERT_EQ(mod_.getFunctions().size(), 3); - ASSERT_EQ(dagList.size(), 1); + EXPECT_EQ(mod_.getFunctions().size(), 3); + EXPECT_EQ(dagList.size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); heterogeneousPartitionValidation(dagList, mod_); @@ -638,8 +638,8 @@ TEST_F(PartitionerTest, heterogeneousPartitioningWithSupportedNodes) { auto err = partitioner.Partition(cctx); EXPECT_FALSE(errToBool(std::move(err))); DAGListTy dagList = std::move(partitioner.getPartitionResult()); - ASSERT_EQ(mod_.getFunctions().size(), 3); - ASSERT_EQ(dagList.size(), 1); + EXPECT_EQ(mod_.getFunctions().size(), 3); + EXPECT_EQ(dagList.size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); heterogeneousPartitionValidation(dagList, mod_); @@ -675,19 +675,19 @@ TEST_F(PartitionerTest, logicalIDTest0) { EXPECT_FALSE(errToBool(std::move(err))); DAGListTy dagList = std::move(partitioner.getPartitionResult()); // Check there are 3 partitions. - ASSERT_EQ(mod_.getFunctions().size(), 3); - ASSERT_EQ(dagList.size(), 1); + EXPECT_EQ(mod_.getFunctions().size(), 3); + EXPECT_EQ(dagList.size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); for (auto &dag : dagList) { // Check number of logical devices; llvm::SmallSet usedID; for (auto &node : dag.nodes) { - ASSERT_EQ(node->logicalDevices.size(), 1); + EXPECT_EQ(node->logicalDevices.size(), 1); usedID.insert(node->logicalDevices[0]); } // Check there are 2 logical devices. - ASSERT_EQ(usedID.size(), 2); + EXPECT_EQ(usedID.size(), 2); } mod_.clear(); } @@ -710,8 +710,8 @@ TEST_F(PartitionerTest, logicalIDTest1) { auto err = partitioner.Partition(cctx); EXPECT_FALSE(errToBool(std::move(err))); DAGListTy dagList = std::move(partitioner.getPartitionResult()); - ASSERT_EQ(mod_.getFunctions().size(), 3); - ASSERT_EQ(dagList.size(), 1); + EXPECT_EQ(mod_.getFunctions().size(), 3); + EXPECT_EQ(dagList.size(), 1); ASSERT_TRUE(checkSaveNode(mod_)); for (auto &dag : dagList) { @@ -720,10 +720,10 @@ TEST_F(PartitionerTest, logicalIDTest1) { for (auto &node : dag.nodes) { // Although the saturateHost is set true, no saturating the host in // heterogeneous partiton. - ASSERT_EQ(node->logicalDevices.size(), 1); + EXPECT_EQ(node->logicalDevices.size(), 1); usedID.insert(node->logicalDevices[0]); } - ASSERT_EQ(usedID.size(), 2); + EXPECT_EQ(usedID.size(), 2); } mod_.clear(); } @@ -886,3 +886,28 @@ TEST_F(PartitionerTest, memoryUsageValidation1) { auto err = myPartitioner.Partition(cctx); EXPECT_TRUE(errToBool(std::move(err))); } + +/// This one tests partition from a user-defined config. +TEST_F(PartitionerTest, partitionFromConfig) { + createSimpleModule(mod_); + std::vector devices = { + {3072, "Interpreter"}, {3072, "Interpreter"}, {3072, "CPU"}}; + + // User-defined partition: 3 partitions (2 interpreter, 1 cpu), Mul nodes to + // CPU, others to Interpreter. + PartitionConfig partitionConfig; + partitionConfig.funcName = "test"; + partitionConfig.numOfPartitions = 3; + partitionConfig.backendNames = {"Interpreter", "CPU", "Interpreter"}; + partitionConfig.partitionNames = {"p1", "p2", "p3"}; + partitionConfig.nodeToPartition = {{"sub", 0}, {"mul", 1}}; + auto partitioner = Partitioner(&mod_, devices, false, false, partitionConfig); + CompilationContext cctx; + auto err = partitioner.Partition(cctx); + EXPECT_FALSE(errToBool(std::move(err))); + DAGListTy dagList = std::move(partitioner.getPartitionResult()); + EXPECT_EQ(mod_.getFunctions().size(), 3); + EXPECT_EQ(dagList.size(), 1); + ASSERT_TRUE(checkSaveNode(mod_)); + heterogeneousPartitionValidation(dagList, mod_); +}