Skip to content

Commit bc410d5

Browse files
author
Man Wang
committed
[Partitioner] Partitioner refactoring - final
1 parent 3787ca3 commit bc410d5

File tree

3 files changed

+240
-86
lines changed

3 files changed

+240
-86
lines changed

include/glow/Partitioner/Partitioner.h

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ class Partitioner final : public PartitionerBase {
3333
/// has the largest memory size.
3434
Function *F_;
3535

36+
/// True if there are more than 1 type of backends.
37+
bool multiBackendNames_;
38+
3639
/// The cost model related to device.
3740
std::vector<DeviceInfo> deviceInfo_;
3841

@@ -72,6 +75,13 @@ class Partitioner final : public PartitionerBase {
7275
/// update the memSize.
7376
static Function *selectRepFunc(Module *parent, uint64_t &memSize);
7477

78+
/// Initialization. Called in class constructor.
79+
void init();
80+
81+
/// Verify the generated functions in module, and dump partition logs from \p
82+
/// partitions and \p mapping.
83+
void finalize(const DAGListTy &partitions, const NodeToFunctionMap &mapping);
84+
7585
/// After getting the initial partitions, adjust the partitions to minimize
7686
/// communication and computation cost.
7787
void partitionsAdjust(NodeToFunctionMap &partitions,
@@ -97,13 +107,6 @@ class Partitioner final : public PartitionerBase {
97107
std::vector<Backend *> &backends,
98108
CompilationContext &cctx);
99109

100-
/// Performs a load balancing optimization pass to optimize for load
101-
/// balance in addition to respecting memory constraints.
102-
llvm::Error loadBalancedPartitioning(Function *F, DeviceIDTy numDevices,
103-
uint64_t availableMemory,
104-
llvm::StringRef backendName,
105-
NodeToFunctionMap &mapping);
106-
107110
/// If there is no need to do any partition, just generate the DAGNode based
108111
/// on current functions in this module for backend \p backendName found in \p
109112
/// backendMap. \p cctx is used for function optimization. \returns the
@@ -161,6 +164,16 @@ class Partitioner final : public PartitionerBase {
161164
/// for function optimization. \returns the partition result or an error.
162165
llvm::Expected<DAGListTy> heterogeneousPartition(CompilationContext &cctx);
163166

167+
/// This partition approach is an experimental one. It tries to balance the
168+
/// workloads of each accelerator/device in addition to respecting memory
169+
/// constraints. \p numDevices is the minimal number of partition. That is,
170+
/// after loadBalancedPartition, the network will be devided up into at lease
171+
/// \p numDevices sub-networks. Now it is overwritten inside of
172+
/// loadBalcnedPartition. But in the future, it can be manually defined by
173+
/// users.
174+
llvm::Expected<DAGListTy> loadBalancedPartition(CompilationContext &cctx,
175+
size_t numDevices = 0);
176+
164177
/// Decompose each function in a module. Given the parameters, this function
165178
/// will choose different partition approches supported in this class:
166179
/// heterogeneous partition, user-defined partition or quantization profiling.

lib/Partitioner/Partitioner.cpp

Lines changed: 131 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -60,22 +60,55 @@ bool sortMinMemory(const std::pair<Function *, uint64_t> &a,
6060
return a.second < b.second;
6161
}
6262

63+
void Partitioner::init() {
64+
memSize_ = module_->getConstantsSize();
65+
logicalDeviceID_ = 0;
66+
multiBackendNames_ = false;
67+
for (size_t i = 1, e = deviceInfo_.size(); i < e; i++) {
68+
if (deviceInfo_[i].backendName != deviceInfo_[0].backendName) {
69+
multiBackendNames_ = true;
70+
break;
71+
}
72+
}
73+
}
74+
75+
void Partitioner::finalize(const DAGListTy &partitions,
76+
const NodeToFunctionMap &mapping) {
77+
auto funcList = module_->getFunctions();
78+
if (logPartition) {
79+
LOG(INFO) << "The number of partitions is : " << funcList.size()
80+
<< ", and the DAG is dumped into DAG.dot file.\n";
81+
dumpDAG("DAG.dot", partitions);
82+
}
83+
84+
for (Function *subF : funcList) {
85+
if (dumpPartition) {
86+
subF->dumpDAG("partitionLogicalID" +
87+
std::to_string(mapping.getLogicalDeviceIDList(subF)[0]) +
88+
"__" + subF->getName().str() + "__" +
89+
mapping.getPartitionBackendName(subF) + ".dot");
90+
}
91+
DCHECK(subF->verify()) << "Conversion led to invalid function";
92+
}
93+
if (logPartition) {
94+
logPartitionInfo(mapping);
95+
}
96+
}
97+
6398
Partitioner::Partitioner(Module *parent, const std::vector<DeviceInfo> &devices,
6499
const std::vector<Backend *> &backends,
65100
bool saturateHost, bool optimized)
66101
: module_(parent), deviceInfo_(devices), backends_(backends),
67102
saturateHost_(saturateHost), optimized_(optimized) {
68-
memSize_ = module_->getConstantsSize();
69-
logicalDeviceID_ = 0;
103+
init();
70104
}
71105

72106
Partitioner::Partitioner(Module *parent, const std::vector<DeviceInfo> &devices,
73107
bool saturateHost, bool optimized,
74108
PartitionConfig partitionConfig)
75109
: module_(parent), deviceInfo_(devices), saturateHost_(saturateHost),
76110
optimized_(optimized), partitionConfig_(partitionConfig) {
77-
memSize_ = module_->getConstantsSize();
78-
logicalDeviceID_ = 0;
111+
init();
79112
}
80113

81114
Function *Partitioner::selectRepFunc(Module *parent, uint64_t &memSize) {
@@ -378,11 +411,40 @@ llvm::Expected<DAGListTy> Partitioner::createDAGWithoutPartition(
378411
return std::move(partitions);
379412
}
380413

381-
llvm::Error Partitioner::loadBalancedPartitioning(Function *F,
382-
DeviceIDTy numDevices,
383-
uint64_t availableMemory,
384-
llvm::StringRef backendName,
385-
NodeToFunctionMap &mapping) {
414+
llvm::Expected<DAGListTy>
415+
Partitioner::loadBalancedPartition(CompilationContext &cctx,
416+
size_t numDevices) {
417+
RETURN_ERR_IF_NOT(
418+
module_->getFunctions().size() == 1,
419+
strFormat("Invalid : %lu functions in a module. Now in load-balanced "
420+
"partition flow, the module can only contain 1 function",
421+
module_->getFunctions().size()));
422+
423+
if (multiBackendNames_) {
424+
VLOG(1) << "For multi backend types, load-balanced partition can't be "
425+
"applied. Call heterogeneous partition instead.";
426+
return heterogeneousPartition(cctx);
427+
}
428+
F_ = selectRepFunc(module_, memSize_);
429+
std::string origName(F_->getName().data());
430+
DAGListTy partitions;
431+
std::vector<Backend *> backends;
432+
genBackendMap(backendMap_, backendHolder, backends);
433+
434+
// Step 1: Get the minial number of partitions from auto-partition.
435+
auto backendName = backends[0]->getBackendName();
436+
uint64_t availableMemory = backendMap_[backendName].memSize;
437+
if (!optimized_) {
438+
RETURN_IF_ERR(::glow::optimizeFunction(F_, *(backends[0]), cctx));
439+
}
440+
NodeToFunctionMap mapping =
441+
selectPartitions(F_, availableMemory, backendName);
442+
logicalDeviceID_ = assignLogicalDeviceID(mapping, backendMap_);
443+
444+
if (logicalDeviceID_ > numDevices) {
445+
numDevices = logicalDeviceID_;
446+
}
447+
// Step 2:
386448
// Currently, the load balanced partitioner disregards the input mapping
387449
// and only uses the numPartitions input from previous partitioning passes
388450
// But we take this in to leave open the option of using the previous mapping
@@ -410,35 +472,35 @@ llvm::Error Partitioner::loadBalancedPartitioning(Function *F,
410472
std::vector<size_t> memoryAvailable(numDevices, availableMemory);
411473
std::vector<NodesSet> nodesInPartitions(numDevices);
412474
std::vector<GraphMemInfo> graphMem(numDevices, GraphMemInfo{});
413-
std::vector<Function *> partitions(numDevices);
475+
std::vector<Function *> partitionFuncs(numDevices);
414476

415477
// Compute total roofline time
478+
NodeToFunctionMap partitionMap;
416479
float totalRooflineTime = 0;
417-
for (auto &n : F->getNodes()) {
480+
for (auto &n : F_->getNodes()) {
418481
totalRooflineTime +=
419482
getNodeComputeTime(&n, backendMap_[deviceInfo_[0].backendName]);
420483
}
421484

422485
float timePerPartition = totalRooflineTime / numDevices;
423486

424487
// Get the BFS levels
425-
NodeToFunctionMap partitionMap;
426488
Function *newF;
427-
BFSLevel bfs = getBFSLevel(F);
489+
BFSLevel bfs = getBFSLevel(F_);
428490
size_t level = bfs.size();
429491

430492
// Create the functions and push them into the mapping
431493
for (DeviceIDTy curPartition = 0; curPartition < numDevices; curPartition++) {
432494
std::string funcName =
433-
std::string(F->getName()) + "_part" + std::to_string(curPartition + 1);
434-
if (F->getParent()->hasFunction(funcName)) {
435-
newF = F->getParent()->getFunction(funcName);
436-
F->getParent()->eraseFunction(newF);
495+
std::string(F_->getName()) + "_part" + std::to_string(curPartition + 1);
496+
if (F_->getParent()->hasFunction(funcName)) {
497+
newF = F_->getParent()->getFunction(funcName);
498+
F_->getParent()->eraseFunction(newF);
437499
}
438-
newF = F->getParent()->createFunction(funcName);
500+
newF = F_->getParent()->createFunction(funcName);
439501
partitionMap.createPartition(newF, backendName);
440502
partitionMap.appendLogicalDeviceID(newF, curPartition);
441-
partitions[curPartition] = newF;
503+
partitionFuncs[curPartition] = newF;
442504
}
443505

444506
// Go through operators level by level
@@ -482,7 +544,7 @@ llvm::Error Partitioner::loadBalancedPartitioning(Function *F,
482544

483545
if (memValid && (loadBalanceValid || curPartition == numDevices - 1)) {
484546
// valid, put the node in the current partition
485-
Function *curF = partitions[curPartition];
547+
Function *curF = partitionFuncs[curPartition];
486548
partitionMap.add(N, curF);
487549
deviceTime[curPartition] += curOpTime;
488550
memoryAvailable[curPartition] -= curOpMemory;
@@ -502,18 +564,36 @@ llvm::Error Partitioner::loadBalancedPartitioning(Function *F,
502564
for (size_t i = 0; i < numDevices; i++) {
503565
VLOG(1) << "Partition #" << i << " has estimated runtime " << deviceTime[i];
504566
}
567+
// Check if the memory usage meets the device memory limitation.
568+
RETURN_IF_ERR(memoryUsageValidation(partitionMap, backendMap_));
569+
570+
logicalDeviceID_ = assignLogicalDeviceID(partitionMap, backendMap_);
571+
RETURN_IF_ERR(logicalDevicesValidation(partitionMap, backendMap_));
572+
573+
partitions =
574+
doPartitioning(origName, {F_}, module_, partitionMap, /* saveDAG */ true);
575+
module_->eraseFunction(F_);
576+
577+
if (saturateHost_ &&
578+
partitionMap.getPartitions().size() < deviceInfo_.size()) {
579+
saturateHost(logicalDeviceID_, partitions);
580+
}
581+
582+
finalize(partitions, partitionMap);
505583

506-
mapping = partitionMap;
507-
return llvm::Error::success();
584+
return std::move(partitions);
508585
}
509586

510587
llvm::Expected<DAGListTy>
511588
Partitioner::quantizationProfilingPartition(CompilationContext &cctx) {
512589
// For quantization profiling flow, currently we assume there is only 1
513590
// function in a module.
514-
DCHECK(module_->getFunctions().size() == 1)
515-
<< "Invalid number of functions in a module. For quantization profiling "
516-
"flow, the module can only contain 1 function";
591+
RETURN_ERR_IF_NOT(
592+
module_->getFunctions().size() == 1,
593+
strFormat(
594+
"Invalid : %lu functions in a module. In quantization profiling "
595+
"partition flow, the module can only contain 1 function",
596+
module_->getFunctions().size()));
517597

518598
// Quantization profiling flow is run under CPU backend, so we don't really
519599
// need the concrete partition. The backendBasedPartition is necessary since
@@ -573,13 +653,21 @@ Partitioner::heterogeneousPartition(CompilationContext &cctx) {
573653
}
574654
return createDAGWithoutPartition(backendName, backendMap_, cctx);
575655
}
576-
DCHECK(module_->getFunctions().size() == 1)
577-
<< "Invalid number of functions in a module. Now in heterogeneouse "
578-
"partition flow, the module can only contain 1 function";
656+
// NOTE: the following error detection will be removed once multi-functions
657+
// in a module is supported.
658+
RETURN_ERR_IF_NOT(
659+
module_->getFunctions().size() == 1,
660+
strFormat("Invalid : %lu functions in a module. Now in heterogeneous "
661+
"partition flow, the module can only contain 1 function",
662+
module_->getFunctions().size()));
579663
} else {
580-
DCHECK(module_->getFunctions().size() == 1)
581-
<< "Invalid number of functions in a module. Now in heterogeneouse "
582-
"partition flow, the module can only contain 1 function";
664+
// NOTE: the following error detection will be removed once multi-functions
665+
// in a module is supported.
666+
RETURN_ERR_IF_NOT(
667+
module_->getFunctions().size() == 1,
668+
strFormat("Invalid : %lu functions in a module. Now in heterogeneous "
669+
"partition flow, the module can only contain 1 function",
670+
module_->getFunctions().size()));
583671
ASSIGN_VALUE_OR_RETURN_ERR(
584672
partitions, backendBasedPartition(funcToBackend, F_, backends, cctx));
585673
module_->eraseFunction(F_);
@@ -618,33 +706,11 @@ Partitioner::heterogeneousPartition(CompilationContext &cctx) {
618706
// devices.
619707
RETURN_IF_ERR(logicalDevicesValidation(mapping, backendMap_));
620708

621-
// Step 4 : Optimization pass to modify results of default partitioner.
622-
// If load balanced partitioner optimization is enabled, then modify
623-
// the results of the default partitioner to optimize based on roofline
624-
// performance.
625-
if (backends.size() == 1 && glow::GlowEnableLoadBalancedPartitioning) {
626-
auto backendName = backends[0]->getBackendName();
627-
size_t numDevices = logicalDeviceID_;
628-
RETURN_IF_ERR(loadBalancedPartitioning(F_, numDevices,
629-
backendMap_[backendName].memSize,
630-
backendName, mapping));
631-
// Check if the memory usage meets the device memory limitation.
632-
RETURN_IF_ERR(memoryUsageValidation(mapping, backendMap_));
633-
// Check if the number of logical devices is less than the given physical
634-
// devices.
635-
RETURN_IF_ERR(logicalDevicesValidation(mapping, backendMap_));
636-
funcs.clear();
637-
funcs.push_back(F_);
638-
}
639-
640-
// Step 5 : do the real partitioning for the function list.
709+
// Step 4 : do the real partitioning for the function list.
641710
partitions =
642711
doPartitioning(origName, funcs, module_, mapping, /* saveDAG */ true);
643712

644-
// DAG validation.
645-
RETURN_IF_ERR(dagValidation(partitions[0]));
646-
647-
// Step 6 : Post-partition optimization - Adjust the logicalDevice for each
713+
// Step 5 : Post-partition optimization - Adjust the logicalDevice for each
648714
// DAGNode.
649715
if (saturateHost_ && backends.size() == 1 &&
650716
mapping.getPartitions().size() < deviceInfo_.size()) {
@@ -654,30 +720,12 @@ Partitioner::heterogeneousPartition(CompilationContext &cctx) {
654720
saturateHost(logicalDeviceID_, partitions);
655721
}
656722

657-
// Step 7 : clean up and verify the generated new functions.
723+
// Step 6 : clean up and verify the generated new functions.
658724
for (auto i = funcToBackend.begin(); i != funcToBackend.end(); ++i) {
659725
module_->eraseFunction(i->first);
660726
}
661727

662-
auto funcList = module_->getFunctions();
663-
if (logPartition) {
664-
LOG(INFO) << "The number of partitions is : " << funcList.size()
665-
<< ", and the DAG is dumped into DAG.dot file.\n";
666-
dumpDAG("DAG.dot", partitions);
667-
}
668-
669-
for (Function *subF : funcList) {
670-
if (dumpPartition) {
671-
subF->dumpDAG("partitionLogicalID" +
672-
std::to_string(mapping.getLogicalDeviceIDList(subF)[0]) +
673-
"__" + subF->getName().str() + "__" +
674-
mapping.getPartitionBackendName(subF) + ".dot");
675-
}
676-
DCHECK(subF->verify()) << "Conversion led to invalid function";
677-
}
678-
if (logPartition) {
679-
logPartitionInfo(mapping);
680-
}
728+
finalize(partitions, mapping);
681729

682730
return std::move(partitions);
683731
}
@@ -768,9 +816,9 @@ Partitioner::partitionFromConfig(const PartitionConfig &partitionConfig) {
768816
RETURN_IF_ERR(::glow::optimizeFunction(func, *backend, cctx));
769817
}
770818
}
771-
if (logPartition) {
772-
logPartitionInfo(partitionMap);
773-
}
819+
820+
finalize(partitions, partitionMap);
821+
774822
return std::move(partitions);
775823
}
776824

@@ -785,6 +833,11 @@ llvm::Expected<DAGListTy> Partitioner::partition(CompilationContext &cctx) {
785833
return quantizationProfilingPartition(cctx);
786834
}
787835

836+
if (!multiBackendNames_ && glow::GlowEnableLoadBalancedPartitioning) {
837+
// Call load-balance partition flow.
838+
return loadBalancedPartition(cctx);
839+
}
840+
788841
// Call heterogeneous partition flow.
789842
return heterogeneousPartition(cctx);
790843
}

0 commit comments

Comments
 (0)