@@ -503,6 +503,30 @@ class OpenMPIRBuilder {
503
503
return allocaInst;
504
504
}
505
505
};
506
+
507
+ struct ScanInformation {
508
+ // / Dominates the body of the loop before scan directive
509
+ llvm::BasicBlock *OMPBeforeScanBlock = nullptr ;
510
+ // / Dominates the body of the loop before scan directive
511
+ llvm::BasicBlock *OMPAfterScanBlock = nullptr ;
512
+ // / Controls the flow to before or after scan blocks
513
+ llvm::BasicBlock *OMPScanDispatch = nullptr ;
514
+ // / Exit block of loop body
515
+ llvm::BasicBlock *OMPScanLoopExit = nullptr ;
516
+ // / Block before loop body where scan initializations are done
517
+ llvm::BasicBlock *OMPScanInit = nullptr ;
518
+ // / Block after loop body where scan finalizations are done
519
+ llvm::BasicBlock *OMPScanFinish = nullptr ;
520
+ // / If true, it indicates Input phase is lowered; else it indicates
521
+ // / ScanPhase is lowered
522
+ bool OMPFirstScanLoop = false ;
523
+ // Maps the private reduction variable to the pointer of the temporary
524
+ // buffer
525
+ llvm::SmallDenseMap<llvm::Value *, llvm::Value *> ScanBuffPtrs;
526
+ llvm::Value *IV;
527
+ llvm::Value *Span;
528
+ } ScanInfo;
529
+
506
530
// / Initialize the internal state, this will put structures types and
507
531
// / potentially other helpers into the underlying module. Must be called
508
532
// / before any other method and only once! This internal state includes types
@@ -729,6 +753,35 @@ class OpenMPIRBuilder {
729
753
LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
730
754
const Twine &Name = " loop" );
731
755
756
+ // / Generator for the control flow structure of an OpenMP canonical loops if
757
+ // / the parent directive has an `inscan` modifier specified.
758
+ // / If the `inscan` modifier is specified, the region of the parent is
759
+ // / expected to have a `scan` directive. Based on the clauses in
760
+ // / scan directive, the body of the loop is split into two loops: Input loop
761
+ // / and Scan Loop. Input loop contains the code generated for input phase of
762
+ // / scan and Scan loop contains the code generated for scan phase of scan.
763
+ // /
764
+ // / \param Loc The insert and source location description.
765
+ // / \param BodyGenCB Callback that will generate the loop body code.
766
+ // / \param Start Value of the loop counter for the first iterations.
767
+ // / \param Stop Loop counter values past this will stop the loop.
768
+ // / \param Step Loop counter increment after each iteration; negative
769
+ // / means counting down.
770
+ // / \param IsSigned Whether Start, Stop and Step are signed integers.
771
+ // / \param InclusiveStop Whether \p Stop itself is a valid value for the loop
772
+ // / counter.
773
+ // / \param ComputeIP Insertion point for instructions computing the trip
774
+ // / count. Can be used to ensure the trip count is available
775
+ // / at the outermost loop of a loop nest. If not set,
776
+ // / defaults to the preheader of the generated loop.
777
+ // / \param Name Base name used to derive BB and instruction names.
778
+ // /
779
+ // / \returns A vector containing Loop Info of Input Loop and Scan Loop.
780
+ Expected<SmallVector<llvm::CanonicalLoopInfo *>> createCanonicalScanLoops (
781
+ const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
782
+ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
783
+ InsertPointTy ComputeIP, const Twine &Name);
784
+
732
785
// / Calculate the trip count of a canonical loop.
733
786
// /
734
787
// / This allows specifying user-defined loop counter values using increment,
@@ -798,13 +851,16 @@ class OpenMPIRBuilder {
798
851
// / at the outermost loop of a loop nest. If not set,
799
852
// / defaults to the preheader of the generated loop.
800
853
// / \param Name Base name used to derive BB and instruction names.
854
+ // / \param InScan Whether loop has a scan reduction specified.
801
855
// /
802
856
// / \returns An object representing the created control flow structure which
803
857
// / can be used for loop-associated directives.
804
- Expected<CanonicalLoopInfo *> createCanonicalLoop (
805
- const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
806
- Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
807
- InsertPointTy ComputeIP = {}, const Twine &Name = " loop" );
858
+ Expected<CanonicalLoopInfo *>
859
+ createCanonicalLoop (const LocationDescription &Loc,
860
+ LoopBodyGenCallbackTy BodyGenCB, Value *Start,
861
+ Value *Stop, Value *Step, bool IsSigned,
862
+ bool InclusiveStop, InsertPointTy ComputeIP = {},
863
+ const Twine &Name = " loop" , bool InScan = false );
808
864
809
865
// / Collapse a loop nest into a single loop.
810
866
// /
@@ -1532,6 +1588,45 @@ class OpenMPIRBuilder {
1532
1588
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
1533
1589
Function *ReduceFn, AttributeList FuncAttrs);
1534
1590
1591
+ // / Creates the runtime call specified
1592
+ // / \param Callee Function Declaration Value
1593
+ // / \param Args Arguments passed to the call
1594
+ // / \param Name Optional param to specify the name of the call Instruction.
1595
+ // /
1596
+ // / \return The Runtime call instruction created.
1597
+ llvm::CallInst *emitNoUnwindRuntimeCall (llvm::FunctionCallee Callee,
1598
+ ArrayRef<llvm::Value *> Args,
1599
+ const llvm::Twine &Name);
1600
+
1601
+ // / Helper function for CreateCanonicalScanLoops to create InputLoop
1602
+ // / in the firstGen and Scan Loop in the SecondGen
1603
+ // / \param InputLoopGen Callback for generating the loop for input phase
1604
+ // / \param ScanLoopGen Callback for generating the loop for scan phase
1605
+ // /
1606
+ // / \return error if any produced, else return success.
1607
+ Error emitScanBasedDirectiveIR (
1608
+ llvm::function_ref<Error()> InputLoopGen,
1609
+ llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen);
1610
+
1611
+ // / Creates the basic blocks required for scan reduction.
1612
+ void createScanBBs ();
1613
+
1614
+ // / Dynamically allocates the buffer needed for scan reduction.
1615
+ // / \param AllocaIP The IP where possibly-shared pointer of buffer needs to be
1616
+ // / declared. \param ScanVars Scan Variables.
1617
+ // /
1618
+ // / \return error if any produced, else return success.
1619
+ Error emitScanBasedDirectiveDeclsIR (InsertPointTy AllocaIP,
1620
+ ArrayRef<llvm::Value *> ScanVars,
1621
+ ArrayRef<llvm::Type *> ScanVarsType);
1622
+
1623
+ // / Copies the result back to the reduction variable.
1624
+ // / \param ReductionInfos Array type containing the ReductionOps.
1625
+ // /
1626
+ // / \return error if any produced, else return success.
1627
+ Error emitScanBasedDirectiveFinalsIR (
1628
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
1629
+
1535
1630
// / This function emits a helper that gathers Reduce lists from the first
1536
1631
// / lane of every active warp to lanes in the first warp.
1537
1632
// /
@@ -2607,6 +2702,41 @@ class OpenMPIRBuilder {
2607
2702
BodyGenCallbackTy BodyGenCB,
2608
2703
FinalizeCallbackTy FiniCB, Value *Filter);
2609
2704
2705
+ // / This function performs the scan reduction of the values updated in
2706
+ // / the input phase. The reduction logic needs to be emitted between input
2707
+ // / and scan loop returned by `CreateCanonicalScanLoops`. The following
2708
+ // / is the code that is generated, `buffer` and `span` are expected to be
2709
+ // / populated before executing the generated code.
2710
+ // /
2711
+ // / for (int k = 0; k != ceil(log2(span)); ++k) {
2712
+ // / i=pow(2,k)
2713
+ // / for (size cnt = last_iter; cnt >= i; --cnt)
2714
+ // / buffer[cnt] op= buffer[cnt-i];
2715
+ // / }
2716
+ // / \param Loc The insert and source location description.
2717
+ // / \param ReductionInfos Array type containing the ReductionOps.
2718
+ // /
2719
+ // / \returns The insertion position *after* the masked.
2720
+ InsertPointOrErrorTy emitScanReduction (
2721
+ const LocationDescription &Loc,
2722
+ SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos);
2723
+
2724
+ // / This directive split and directs the control flow to input phase
2725
+ // / blocks or scan phase blocks based on 1. whether input loop or scan loop
2726
+ // / is executed, 2. whether exclusive or inclusive scan is used.
2727
+ // /
2728
+ // / \param Loc The insert and source location description.
2729
+ // / \param AllocaIP The IP where the temporary buffer for scan reduction
2730
+ // needs to be allocated.
2731
+ // / \param ScanVars Scan Variables.
2732
+ // / \param IsInclusive Whether it is an inclusive or exclusive scan.
2733
+ // /
2734
+ // / \returns The insertion position *after* the scan.
2735
+ InsertPointOrErrorTy createScan (const LocationDescription &Loc,
2736
+ InsertPointTy AllocaIP,
2737
+ ArrayRef<llvm::Value *> ScanVars,
2738
+ ArrayRef<llvm::Type *> ScanVarsType,
2739
+ bool IsInclusive);
2610
2740
// / Generator for '#omp critical'
2611
2741
// /
2612
2742
// / \param Loc The insert and source location description.
0 commit comments