@@ -60,22 +60,55 @@ bool sortMinMemory(const std::pair<Function *, uint64_t> &a,
60
60
return a.second < b.second ;
61
61
}
62
62
63
+ void Partitioner::init () {
64
+ memSize_ = module_->getConstantsSize ();
65
+ logicalDeviceID_ = 0 ;
66
+ multiBackendNames_ = false ;
67
+ for (size_t i = 1 , e = deviceInfo_.size (); i < e; i++) {
68
+ if (deviceInfo_[i].backendName != deviceInfo_[0 ].backendName ) {
69
+ multiBackendNames_ = true ;
70
+ break ;
71
+ }
72
+ }
73
+ }
74
+
75
+ void Partitioner::finalize (const DAGListTy &partitions,
76
+ const NodeToFunctionMap &mapping) {
77
+ auto funcList = module_->getFunctions ();
78
+ if (logPartition) {
79
+ LOG (INFO) << " The number of partitions is : " << funcList.size ()
80
+ << " , and the DAG is dumped into DAG.dot file.\n " ;
81
+ dumpDAG (" DAG.dot" , partitions);
82
+ }
83
+
84
+ for (Function *subF : funcList) {
85
+ if (dumpPartition) {
86
+ subF->dumpDAG (" partitionLogicalID" +
87
+ std::to_string (mapping.getLogicalDeviceIDList (subF)[0 ]) +
88
+ " __" + subF->getName ().str () + " __" +
89
+ mapping.getPartitionBackendName (subF) + " .dot" );
90
+ }
91
+ DCHECK (subF->verify ()) << " Conversion led to invalid function" ;
92
+ }
93
+ if (logPartition) {
94
+ logPartitionInfo (mapping);
95
+ }
96
+ }
97
+
63
98
Partitioner::Partitioner (Module *parent, const std::vector<DeviceInfo> &devices,
64
99
const std::vector<Backend *> &backends,
65
100
bool saturateHost, bool optimized)
66
101
: module_(parent), deviceInfo_(devices), backends_(backends),
67
102
saturateHost_(saturateHost), optimized_(optimized) {
68
- memSize_ = module_->getConstantsSize ();
69
- logicalDeviceID_ = 0 ;
103
+ init ();
70
104
}
71
105
72
106
Partitioner::Partitioner (Module *parent, const std::vector<DeviceInfo> &devices,
73
107
bool saturateHost, bool optimized,
74
108
PartitionConfig partitionConfig)
75
109
: module_(parent), deviceInfo_(devices), saturateHost_(saturateHost),
76
110
optimized_(optimized), partitionConfig_(partitionConfig) {
77
- memSize_ = module_->getConstantsSize ();
78
- logicalDeviceID_ = 0 ;
111
+ init ();
79
112
}
80
113
81
114
Function *Partitioner::selectRepFunc (Module *parent, uint64_t &memSize) {
@@ -378,11 +411,40 @@ llvm::Expected<DAGListTy> Partitioner::createDAGWithoutPartition(
378
411
return std::move (partitions);
379
412
}
380
413
381
- llvm::Error Partitioner::loadBalancedPartitioning (Function *F,
382
- DeviceIDTy numDevices,
383
- uint64_t availableMemory,
384
- llvm::StringRef backendName,
385
- NodeToFunctionMap &mapping) {
414
+ llvm::Expected<DAGListTy>
415
+ Partitioner::loadBalancedPartition (CompilationContext &cctx,
416
+ size_t numDevices) {
417
+ RETURN_ERR_IF_NOT (
418
+ module_->getFunctions ().size () == 1 ,
419
+ strFormat (" Invalid : %lu functions in a module. Now in load-balanced "
420
+ " partition flow, the module can only contain 1 function" ,
421
+ module_->getFunctions ().size ()));
422
+
423
+ if (multiBackendNames_) {
424
+ VLOG (1 ) << " For multi backend types, load-balanced partition can't be "
425
+ " applied. Call heterogeneous partition instead." ;
426
+ return heterogeneousPartition (cctx);
427
+ }
428
+ F_ = selectRepFunc (module_, memSize_);
429
+ std::string origName (F_->getName ().data ());
430
+ DAGListTy partitions;
431
+ std::vector<Backend *> backends;
432
+ genBackendMap (backendMap_, backendHolder, backends);
433
+
434
+ // Step 1: Get the minial number of partitions from auto-partition.
435
+ auto backendName = backends[0 ]->getBackendName ();
436
+ uint64_t availableMemory = backendMap_[backendName].memSize ;
437
+ if (!optimized_) {
438
+ RETURN_IF_ERR (::glow::optimizeFunction (F_, *(backends[0 ]), cctx));
439
+ }
440
+ NodeToFunctionMap mapping =
441
+ selectPartitions (F_, availableMemory, backendName);
442
+ logicalDeviceID_ = assignLogicalDeviceID (mapping, backendMap_);
443
+
444
+ if (logicalDeviceID_ > numDevices) {
445
+ numDevices = logicalDeviceID_;
446
+ }
447
+ // Step 2:
386
448
// Currently, the load balanced partitioner disregards the input mapping
387
449
// and only uses the numPartitions input from previous partitioning passes
388
450
// But we take this in to leave open the option of using the previous mapping
@@ -410,35 +472,35 @@ llvm::Error Partitioner::loadBalancedPartitioning(Function *F,
410
472
std::vector<size_t > memoryAvailable (numDevices, availableMemory);
411
473
std::vector<NodesSet> nodesInPartitions (numDevices);
412
474
std::vector<GraphMemInfo> graphMem (numDevices, GraphMemInfo{});
413
- std::vector<Function *> partitions (numDevices);
475
+ std::vector<Function *> partitionFuncs (numDevices);
414
476
415
477
// Compute total roofline time
478
+ NodeToFunctionMap partitionMap;
416
479
float totalRooflineTime = 0 ;
417
- for (auto &n : F ->getNodes ()) {
480
+ for (auto &n : F_ ->getNodes ()) {
418
481
totalRooflineTime +=
419
482
getNodeComputeTime (&n, backendMap_[deviceInfo_[0 ].backendName ]);
420
483
}
421
484
422
485
float timePerPartition = totalRooflineTime / numDevices;
423
486
424
487
// Get the BFS levels
425
- NodeToFunctionMap partitionMap;
426
488
Function *newF;
427
- BFSLevel bfs = getBFSLevel (F );
489
+ BFSLevel bfs = getBFSLevel (F_ );
428
490
size_t level = bfs.size ();
429
491
430
492
// Create the functions and push them into the mapping
431
493
for (DeviceIDTy curPartition = 0 ; curPartition < numDevices; curPartition++) {
432
494
std::string funcName =
433
- std::string (F ->getName ()) + " _part" + std::to_string (curPartition + 1 );
434
- if (F ->getParent ()->hasFunction (funcName)) {
435
- newF = F ->getParent ()->getFunction (funcName);
436
- F ->getParent ()->eraseFunction (newF);
495
+ std::string (F_ ->getName ()) + " _part" + std::to_string (curPartition + 1 );
496
+ if (F_ ->getParent ()->hasFunction (funcName)) {
497
+ newF = F_ ->getParent ()->getFunction (funcName);
498
+ F_ ->getParent ()->eraseFunction (newF);
437
499
}
438
- newF = F ->getParent ()->createFunction (funcName);
500
+ newF = F_ ->getParent ()->createFunction (funcName);
439
501
partitionMap.createPartition (newF, backendName);
440
502
partitionMap.appendLogicalDeviceID (newF, curPartition);
441
- partitions [curPartition] = newF;
503
+ partitionFuncs [curPartition] = newF;
442
504
}
443
505
444
506
// Go through operators level by level
@@ -482,7 +544,7 @@ llvm::Error Partitioner::loadBalancedPartitioning(Function *F,
482
544
483
545
if (memValid && (loadBalanceValid || curPartition == numDevices - 1 )) {
484
546
// valid, put the node in the current partition
485
- Function *curF = partitions [curPartition];
547
+ Function *curF = partitionFuncs [curPartition];
486
548
partitionMap.add (N, curF);
487
549
deviceTime[curPartition] += curOpTime;
488
550
memoryAvailable[curPartition] -= curOpMemory;
@@ -502,18 +564,36 @@ llvm::Error Partitioner::loadBalancedPartitioning(Function *F,
502
564
for (size_t i = 0 ; i < numDevices; i++) {
503
565
VLOG (1 ) << " Partition #" << i << " has estimated runtime " << deviceTime[i];
504
566
}
567
+ // Check if the memory usage meets the device memory limitation.
568
+ RETURN_IF_ERR (memoryUsageValidation (partitionMap, backendMap_));
569
+
570
+ logicalDeviceID_ = assignLogicalDeviceID (partitionMap, backendMap_);
571
+ RETURN_IF_ERR (logicalDevicesValidation (partitionMap, backendMap_));
572
+
573
+ partitions =
574
+ doPartitioning (origName, {F_}, module_, partitionMap, /* saveDAG */ true );
575
+ module_->eraseFunction (F_);
576
+
577
+ if (saturateHost_ &&
578
+ partitionMap.getPartitions ().size () < deviceInfo_.size ()) {
579
+ saturateHost (logicalDeviceID_, partitions);
580
+ }
581
+
582
+ finalize (partitions, partitionMap);
505
583
506
- mapping = partitionMap;
507
- return llvm::Error::success ();
584
+ return std::move (partitions);
508
585
}
509
586
510
587
llvm::Expected<DAGListTy>
511
588
Partitioner::quantizationProfilingPartition (CompilationContext &cctx) {
512
589
// For quantization profiling flow, currently we assume there is only 1
513
590
// function in a module.
514
- DCHECK (module_->getFunctions ().size () == 1 )
515
- << " Invalid number of functions in a module. For quantization profiling "
516
- " flow, the module can only contain 1 function" ;
591
+ RETURN_ERR_IF_NOT (
592
+ module_->getFunctions ().size () == 1 ,
593
+ strFormat (
594
+ " Invalid : %lu functions in a module. In quantization profiling "
595
+ " partition flow, the module can only contain 1 function" ,
596
+ module_->getFunctions ().size ()));
517
597
518
598
// Quantization profiling flow is run under CPU backend, so we don't really
519
599
// need the concrete partition. The backendBasedPartition is necessary since
@@ -573,13 +653,21 @@ Partitioner::heterogeneousPartition(CompilationContext &cctx) {
573
653
}
574
654
return createDAGWithoutPartition (backendName, backendMap_, cctx);
575
655
}
576
- DCHECK (module_->getFunctions ().size () == 1 )
577
- << " Invalid number of functions in a module. Now in heterogeneouse "
578
- " partition flow, the module can only contain 1 function" ;
656
+ // NOTE: the following error detection will be removed once multi-functions
657
+ // in a module is supported.
658
+ RETURN_ERR_IF_NOT (
659
+ module_->getFunctions ().size () == 1 ,
660
+ strFormat (" Invalid : %lu functions in a module. Now in heterogeneous "
661
+ " partition flow, the module can only contain 1 function" ,
662
+ module_->getFunctions ().size ()));
579
663
} else {
580
- DCHECK (module_->getFunctions ().size () == 1 )
581
- << " Invalid number of functions in a module. Now in heterogeneouse "
582
- " partition flow, the module can only contain 1 function" ;
664
+ // NOTE: the following error detection will be removed once multi-functions
665
+ // in a module is supported.
666
+ RETURN_ERR_IF_NOT (
667
+ module_->getFunctions ().size () == 1 ,
668
+ strFormat (" Invalid : %lu functions in a module. Now in heterogeneous "
669
+ " partition flow, the module can only contain 1 function" ,
670
+ module_->getFunctions ().size ()));
583
671
ASSIGN_VALUE_OR_RETURN_ERR (
584
672
partitions, backendBasedPartition (funcToBackend, F_, backends, cctx));
585
673
module_->eraseFunction (F_);
@@ -618,33 +706,11 @@ Partitioner::heterogeneousPartition(CompilationContext &cctx) {
618
706
// devices.
619
707
RETURN_IF_ERR (logicalDevicesValidation (mapping, backendMap_));
620
708
621
- // Step 4 : Optimization pass to modify results of default partitioner.
622
- // If load balanced partitioner optimization is enabled, then modify
623
- // the results of the default partitioner to optimize based on roofline
624
- // performance.
625
- if (backends.size () == 1 && glow::GlowEnableLoadBalancedPartitioning) {
626
- auto backendName = backends[0 ]->getBackendName ();
627
- size_t numDevices = logicalDeviceID_;
628
- RETURN_IF_ERR (loadBalancedPartitioning (F_, numDevices,
629
- backendMap_[backendName].memSize ,
630
- backendName, mapping));
631
- // Check if the memory usage meets the device memory limitation.
632
- RETURN_IF_ERR (memoryUsageValidation (mapping, backendMap_));
633
- // Check if the number of logical devices is less than the given physical
634
- // devices.
635
- RETURN_IF_ERR (logicalDevicesValidation (mapping, backendMap_));
636
- funcs.clear ();
637
- funcs.push_back (F_);
638
- }
639
-
640
- // Step 5 : do the real partitioning for the function list.
709
+ // Step 4 : do the real partitioning for the function list.
641
710
partitions =
642
711
doPartitioning (origName, funcs, module_, mapping, /* saveDAG */ true );
643
712
644
- // DAG validation.
645
- RETURN_IF_ERR (dagValidation (partitions[0 ]));
646
-
647
- // Step 6 : Post-partition optimization - Adjust the logicalDevice for each
713
+ // Step 5 : Post-partition optimization - Adjust the logicalDevice for each
648
714
// DAGNode.
649
715
if (saturateHost_ && backends.size () == 1 &&
650
716
mapping.getPartitions ().size () < deviceInfo_.size ()) {
@@ -654,30 +720,12 @@ Partitioner::heterogeneousPartition(CompilationContext &cctx) {
654
720
saturateHost (logicalDeviceID_, partitions);
655
721
}
656
722
657
- // Step 7 : clean up and verify the generated new functions.
723
+ // Step 6 : clean up and verify the generated new functions.
658
724
for (auto i = funcToBackend.begin (); i != funcToBackend.end (); ++i) {
659
725
module_->eraseFunction (i->first );
660
726
}
661
727
662
- auto funcList = module_->getFunctions ();
663
- if (logPartition) {
664
- LOG (INFO) << " The number of partitions is : " << funcList.size ()
665
- << " , and the DAG is dumped into DAG.dot file.\n " ;
666
- dumpDAG (" DAG.dot" , partitions);
667
- }
668
-
669
- for (Function *subF : funcList) {
670
- if (dumpPartition) {
671
- subF->dumpDAG (" partitionLogicalID" +
672
- std::to_string (mapping.getLogicalDeviceIDList (subF)[0 ]) +
673
- " __" + subF->getName ().str () + " __" +
674
- mapping.getPartitionBackendName (subF) + " .dot" );
675
- }
676
- DCHECK (subF->verify ()) << " Conversion led to invalid function" ;
677
- }
678
- if (logPartition) {
679
- logPartitionInfo (mapping);
680
- }
728
+ finalize (partitions, mapping);
681
729
682
730
return std::move (partitions);
683
731
}
@@ -768,9 +816,9 @@ Partitioner::partitionFromConfig(const PartitionConfig &partitionConfig) {
768
816
RETURN_IF_ERR (::glow::optimizeFunction (func, *backend, cctx));
769
817
}
770
818
}
771
- if (logPartition) {
772
- logPartitionInfo ( partitionMap);
773
- }
819
+
820
+ finalize (partitions, partitionMap);
821
+
774
822
return std::move (partitions);
775
823
}
776
824
@@ -785,6 +833,11 @@ llvm::Expected<DAGListTy> Partitioner::partition(CompilationContext &cctx) {
785
833
return quantizationProfilingPartition (cctx);
786
834
}
787
835
836
+ if (!multiBackendNames_ && glow::GlowEnableLoadBalancedPartitioning) {
837
+ // Call load-balance partition flow.
838
+ return loadBalancedPartition (cctx);
839
+ }
840
+
788
841
// Call heterogeneous partition flow.
789
842
return heterogeneousPartition (cctx);
790
843
}
0 commit comments