From 8929ada133683e9b1764e89f97107283f424b088 Mon Sep 17 00:00:00 2001 From: V Donaldson Date: Fri, 29 Oct 2021 21:22:39 -0700 Subject: [PATCH 1/3] Unstructured OpenMP code Address several problems with OpenMP constructs and unstructured code. In fir, code that contains a GOTO, EXIT, or any of a number of other branches is _unstructured_. OpenMP generally prohibits branching into or out of an OpenMP construct, but allows unstructured branches where the source and target are both local to the construct. A structured loop is implemented with a fir.do_loop op, and a structured IF is implemented with a fir.if op. Unstructured loops and IFs are implemented with explicit branches between basic blocks. This PR allows an OpenMP construct to immediately follow unstructured code (see PR 1077), and allows an OpenMP construct to contain unstructured code (see Issue 1120). The same issues are likely present in OpenACC code. The infrastructure changes in this PR should also be valid for OpenACC code, but file OpenACC.cpp is not changed. --- flang/include/flang/Lower/PFTBuilder.h | 30 +++-- flang/lib/Lower/Bridge.cpp | 11 +- flang/lib/Lower/OpenMP.cpp | 33 +++++- flang/lib/Lower/PFTBuilder.cpp | 61 +++++----- flang/test/Lower/OpenMP/omp-unstructured.f90 | 111 +++++++++++++++++++ flang/test/Lower/pre-fir-tree01.f90 | 4 +- flang/test/Lower/pre-fir-tree02.f90 | 3 +- flang/test/Lower/pre-fir-tree05.f90 | 6 +- 8 files changed, 210 insertions(+), 49 deletions(-) create mode 100644 flang/test/Lower/OpenMP/omp-unstructured.f90 diff --git a/flang/include/flang/Lower/PFTBuilder.h b/flang/include/flang/Lower/PFTBuilder.h index 54ed161b4cc7a..b28e2f6f06680 100644 --- a/flang/include/flang/Lower/PFTBuilder.h +++ b/flang/include/flang/Lower/PFTBuilder.h @@ -134,9 +134,8 @@ using Constructs = using Directives = std::tuple; + parser::OpenACCDeclarativeConstruct, parser::OpenMPConstruct, + parser::OpenMPDeclarativeConstruct, parser::OmpEndLoopDirective>; template static constexpr bool isActionStmt{common::HasMember}; @@ -168,6 +167,11 @@ static constexpr bool isNopConstructStmt{common::HasMember< parser::EndIfStmt, parser::SelectRankCaseStmt, parser::TypeGuardStmt>>}; +template +static constexpr bool isExecutableDirective{common::HasMember< + A, std::tuple>}; + template static constexpr bool isFunctionLike{common::HasMember< A, std::tuple>; }}); } + constexpr bool isExecutableDirective() const { + return visit(common::visitors{[](auto &r) { + return pft::isExecutableDirective>; + }}); + } /// Return the predicate: "This is a non-initial, non-terminal construct /// statement." For an IfConstruct, this is ElseIfStmt and ElseStmt. @@ -297,11 +306,12 @@ struct Evaluation : EvaluationVariant { // FIR generation looks primarily at PFT ActionStmt and ConstructStmt leaf // nodes. Members such as lexicalSuccessor and block are applicable only - // to these nodes. The controlSuccessor member is used for nonlexical - // successors, such as linking to a GOTO target. For multiway branches, - // it is set to the first target. Successor and exit links always target - // statements. An internal Construct node has a constructExit link that - // applies to exits from anywhere within the construct. + // to these nodes, plus some directives. The controlSuccessor member is + // used for nonlexical successors, such as linking to a GOTO target. For + // multiway branches, it is set to the first target. Successor and exit + // links always target statements or directives. An internal Construct + // node has a constructExit link that applies to exits from anywhere within + // the construct. // // An unstructured construct is one that contains some form of goto. This // is indicated by the isUnstructured member flag, which may be set on a @@ -329,8 +339,8 @@ struct Evaluation : EvaluationVariant { std::optional label{}; std::unique_ptr evaluationList; // nested evaluations Evaluation *parentConstruct{nullptr}; // set for nodes below the top level - Evaluation *lexicalSuccessor{nullptr}; // set for ActionStmt, ConstructStmt - Evaluation *controlSuccessor{nullptr}; // set for some statements + Evaluation *lexicalSuccessor{nullptr}; // set for leaf nodes, some directives + Evaluation *controlSuccessor{nullptr}; // set for some leaf nodes Evaluation *constructExit{nullptr}; // set for constructs bool isNewBlock{false}; // evaluation begins a new basic block bool isUnstructured{false}; // evaluation has unstructured control flow diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index c326b887a490c..8943fd00ee2ed 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2485,9 +2485,16 @@ class FirConverter : public Fortran::lower::AbstractConverter { /// Unconditionally switch code insertion to a new block. void startBlock(mlir::Block *newBlock) { assert(newBlock && "missing block"); + // Default termination for the current block is a fallthrough branch to + // the new block. if (blockIsUnterminated()) - genFIRBranch(newBlock); // default termination is a fallthrough branch - builder->setInsertionPointToEnd(newBlock); // newBlock might not be empty + genFIRBranch(newBlock); + // Some blocks may be re/started more than once, and might not be empty. + // If the new block already has (only) a terminator, set the insertion + // point to the start of the block. Otherwise set it to the end. + builder->setInsertionPointToStart(newBlock); + if (blockIsUnterminated()) + builder->setInsertionPointToEnd(newBlock); } /// Conditionally switch code insertion to a new block. diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 65c70e49f5316..7f7013573d4da 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -93,9 +93,27 @@ static void genObjectList(const Fortran::parser::OmpObjectList &objectList, } } +/// Create empty blocks for the current region. +/// These blocks replace blocks parented to an enclosing region. +void createEmptyRegionBlocks( + fir::FirOpBuilder &firOpBuilder, + std::list &evaluationList) { + auto *region = &firOpBuilder.getRegion(); + for (auto &eval : evaluationList) { + if (eval.block) { + assert(eval.block->empty() && "block is not empty"); + eval.block->erase(); + eval.block = firOpBuilder.createBlock(region); + } + if (eval.hasNestedEvaluations()) + createEmptyRegionBlocks(firOpBuilder, eval.getNestedEvaluations()); + } +} + template static void createBodyOfOp( Op &op, Fortran::lower::AbstractConverter &converter, mlir::Location &loc, + Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OmpClauseList *clauses = nullptr, const SmallVector &args = {}) { auto &firOpBuilder = converter.getFirOpBuilder(); @@ -122,6 +140,8 @@ static void createBodyOfOp( } auto &block = op.getRegion().back(); firOpBuilder.setInsertionPointToStart(&block); + if (eval.lowerAsUnstructured()) + createEmptyRegionBlocks(firOpBuilder, eval.getNestedEvaluations()); // Ensure the block is well-formed by inserting terminators. if constexpr (std::is_same_v) { mlir::ValueRange results; @@ -324,7 +344,7 @@ static void createParallelOp(Fortran::lower::AbstractConverter &converter, // Avoid multiple privatization: If Parallel is part of a combined construct // then privatization will be performed later when the other part of the // combined construct is processed. - createBodyOfOp(parallelOp, converter, currentLocation, + createBodyOfOp(parallelOp, converter, currentLocation, eval, isCombined ? nullptr : &opClauseList); } @@ -345,7 +365,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, auto &firOpBuilder = converter.getFirOpBuilder(); auto currentLocation = converter.getCurrentLocation(); auto masterOp = firOpBuilder.create(currentLocation); - createBodyOfOp(masterOp, converter, currentLocation); + createBodyOfOp(masterOp, converter, currentLocation, eval); } } @@ -599,12 +619,13 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, wsLoopOp.nowaitAttr(firOpBuilder.getUnitAttr()); } - createBodyOfOp(wsLoopOp, converter, currentLocation, + createBodyOfOp(wsLoopOp, converter, currentLocation, eval, &wsLoopOpClauseList, iv); } static void genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPCriticalConstruct &criticalConstruct) { auto &firOpBuilder = converter.getFirOpBuilder(); auto currentLocation = converter.getCurrentLocation(); @@ -642,7 +663,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, firOpBuilder.getContext(), global.sym_name())); } }(); - createBodyOfOp(criticalOp, converter, currentLocation); + createBodyOfOp(criticalOp, converter, currentLocation, eval); } void Fortran::lower::genOpenMPConstruct( @@ -678,7 +699,9 @@ void Fortran::lower::genOpenMPConstruct( TODO(converter.getCurrentLocation(), "OpenMPAtomicConstruct"); }, [&](const Fortran::parser::OpenMPCriticalConstruct - &criticalConstruct) { genOMP(converter, criticalConstruct); }, + &criticalConstruct) { + genOMP(converter, eval, criticalConstruct); + }, }, ompConstruct.u); } diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp index 449fb3cd0bd87..84df72de7e9ac 100644 --- a/flang/lib/Lower/PFTBuilder.cpp +++ b/flang/lib/Lower/PFTBuilder.cpp @@ -296,11 +296,11 @@ class PFTBuilder { resetFunctionState(); } - /// Initialize a new construct and make it the builder's focus. + /// Initialize a new construct or directive and make it the builder's focus. template - bool enterConstructOrDirective(const A &construct) { - auto &eval = - addEvaluation(lower::pft::Evaluation{construct, pftParentStack.back()}); + bool enterConstructOrDirective(const A &constructOrDirective) { + auto &eval = addEvaluation( + lower::pft::Evaluation{constructOrDirective, pftParentStack.back()}); eval.evaluationList.reset(new lower::pft::EvaluationList); pushEvaluationList(eval.evaluationList.get()); pftParentStack.emplace_back(eval); @@ -310,6 +310,17 @@ class PFTBuilder { void exitConstructOrDirective() { rewriteIfGotos(); + auto *eval = constructAndDirectiveStack.back(); + if (eval->isExecutableDirective()) { + // A construct at the end of an (unstructured) OpenACC or OpenMP + // construct region must have an exit target inside the region. + auto &evaluationList = *eval->evaluationList; + if (!evaluationList.empty() && evaluationList.back().isConstruct()) { + static const parser::ContinueStmt exitTarget{}; + addEvaluation( + lower::pft::Evaluation{exitTarget, pftParentStack.back(), {}, {}}); + } + } popEvaluationList(); pftParentStack.pop_back(); constructAndDirectiveStack.pop_back(); @@ -372,7 +383,8 @@ class PFTBuilder { auto &entryPointList = eval.getOwningProcedure()->entryPointList; evaluationListStack.back()->emplace_back(std::move(eval)); lower::pft::Evaluation *p = &evaluationListStack.back()->back(); - if (p->isActionStmt() || p->isConstructStmt() || p->isEndStmt()) { + if (p->isActionStmt() || p->isConstructStmt() || p->isEndStmt() || + p->isExecutableDirective()) { if (lastLexicalEvaluation) { lastLexicalEvaluation->lexicalSuccessor = p; p->printIndex = lastLexicalEvaluation->printIndex + 1; @@ -1017,33 +1029,32 @@ class PFTDumper { const lower::pft::Evaluation &eval, const std::string &indentString, int indent = 1) { llvm::StringRef name = evaluationName(eval); - std::string bang = eval.isUnstructured ? "!" : ""; - if (eval.isConstruct() || eval.isDirective()) { - outputStream << indentString << "<<" << name << bang << ">>"; - if (eval.constructExit) - outputStream << " -> " << eval.constructExit->printIndex; - outputStream << '\n'; - dumpEvaluationList(outputStream, *eval.evaluationList, indent + 1); - outputStream << indentString << "<>\n"; - return; - } + llvm::StringRef newBlock = eval.isNewBlock ? "^" : ""; + llvm::StringRef bang = eval.isUnstructured ? "!" : ""; outputStream << indentString; if (eval.printIndex) outputStream << eval.printIndex << ' '; - if (eval.isNewBlock) - outputStream << '^'; - outputStream << name << bang; - if (eval.isActionStmt() || eval.isConstructStmt()) { - if (eval.negateCondition) - outputStream << " [negate]"; - if (eval.controlSuccessor) - outputStream << " -> " << eval.controlSuccessor->printIndex; - } else if (eval.isA() && eval.lexicalSuccessor) { + if (eval.hasNestedEvaluations()) + outputStream << "<<" << newBlock << name << bang << ">>"; + else + outputStream << newBlock << name << bang; + if (eval.negateCondition) + outputStream << " [negate]"; + if (eval.constructExit) + outputStream << " -> " << eval.constructExit->printIndex; + else if (eval.controlSuccessor) + outputStream << " -> " << eval.controlSuccessor->printIndex; + else if (eval.isA() && eval.lexicalSuccessor) outputStream << " -> " << eval.lexicalSuccessor->printIndex; - } if (!eval.position.empty()) outputStream << ": " << eval.position.ToString(); + else if (auto *dir = eval.getIf()) + outputStream << ": !" << dir->source.ToString(); outputStream << '\n'; + if (eval.hasNestedEvaluations()) { + dumpEvaluationList(outputStream, *eval.evaluationList, indent + 1); + outputStream << indentString << "<>\n"; + } } void dumpEvaluation(llvm::raw_ostream &ostream, diff --git a/flang/test/Lower/OpenMP/omp-unstructured.f90 b/flang/test/Lower/OpenMP/omp-unstructured.f90 new file mode 100644 index 0000000000000..6b3cbab167f9c --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-unstructured.f90 @@ -0,0 +1,111 @@ +! RUN: bbc %s -fopenmp -o "-" | FileCheck %s + +! CHECK-LABEL: func @_QPss1{{.*}} { +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb4, ^bb3 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb4: // 2 preds: ^bb1, ^bb2 +! CHECK: omp.master { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: omp.terminator +! CHECK: } +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: } +subroutine ss1(n) + do i = 1, 3 + if (i .eq. n) exit + print*, 'ss1-A', i + enddo + !$omp master + print*, 'ss1-B', i + !$omp end master + print* +end + +! CHECK-LABEL: func @_QPss2{{.*}} { +! CHECK: omp.master { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb4, ^bb3 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb4: // 2 preds: ^bb1, ^bb2 +! CHECK: omp.terminator +! CHECK: } +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: } +subroutine ss2(n) + !$omp master + print*, 'ss2-A', n + do i = 1, 3 + if (i .eq. n) exit + print*, 'ss2-B', i + enddo + !$omp end master + print*, 'ss2-C', i + print* +end + +! CHECK-LABEL: func @_QPss3{{.*}} { +! CHECK: omp.parallel { +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb2 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb3 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: omp.wsloop {{.*}} { +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.wsloop {{.*}} { +! CHECK: br ^bb1 +! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 +! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 +! CHECK: ^bb2: // pred: ^bb1 +! CHECK: cond_br %{{[0-9]*}}, ^bb4, ^bb3 +! CHECK: ^bb3: // pred: ^bb2 +! CHECK: @_FortranAioBeginExternalListOutput +! CHECK: br ^bb1 +! CHECK: ^bb4: // 2 preds: ^bb1, ^bb2 +! CHECK: omp.yield +! CHECK: } +! CHECK: br ^bb1 +! CHECK: ^bb3: // pred: ^bb1 +! CHECK: omp.terminator +! CHECK: } +! CHECK: } +subroutine ss3(n) + !$omp parallel + do i = 1, 3 + !$omp do + do k = 1, 3 + print*, 'ss3-A', k + enddo + !$omp end do + !$omp do + do j = 1, 3 + do k = 1, 3 + if (k .eq. n) exit + print*, 'ss3-B', k + enddo + enddo + !$omp end do + enddo + !$omp end parallel +end + +! CHECK-LABEL: func @_QQmain +program p + call ss1(2) + call ss2(2) + call ss3(2) +end diff --git a/flang/test/Lower/pre-fir-tree01.f90 b/flang/test/Lower/pre-fir-tree01.f90 index c48611441499f..0af8eef28fc53 100644 --- a/flang/test/Lower/pre-fir-tree01.f90 +++ b/flang/test/Lower/pre-fir-tree01.f90 @@ -132,14 +132,12 @@ function bar() ! Test top level directives !DIR$ INTEGER=64 ! CHECK: CompilerDirective: -! CHECK: End CompilerDirective ! Test nested directive ! CHECK: Subroutine test_directive subroutine test_directive() !DIR$ INTEGER=64 - ! CHECK: <> - ! CHECK: <> + ! CHECK: CompilerDirective: end subroutine ! CHECK: EndSubroutine diff --git a/flang/test/Lower/pre-fir-tree02.f90 b/flang/test/Lower/pre-fir-tree02.f90 index 98572c54863de..7cc55df4c0bb8 100644 --- a/flang/test/Lower/pre-fir-tree02.f90 +++ b/flang/test/Lower/pre-fir-tree02.f90 @@ -212,8 +212,7 @@ function bar(x) ! CHECK: Subroutine sub subroutine sub(a) real(4):: a - ! CompilerDirective - ! CHECK: <> + ! CompilerDirective: !DIR$ IGNORE_TKR a end subroutine diff --git a/flang/test/Lower/pre-fir-tree05.f90 b/flang/test/Lower/pre-fir-tree05.f90 index 862bf87d9a3f7..ad6ba8a027a27 100644 --- a/flang/test/Lower/pre-fir-tree05.f90 +++ b/flang/test/Lower/pre-fir-tree05.f90 @@ -24,14 +24,15 @@ subroutine foo() ! CHECK-NEXT: EndDoStmt ! CHECK-NEXT: <> end do + ! CHECK-NEXT: ContinueStmt !$acc end parallel - ! CHECK-NEXT: <> + ! CHECK-NEXT: <> ! CHECK-NEXT: <> ! CHECK-NEXT: EndSubroutineStmt end subroutine ! CHECK-NEXT: End Subroutine foo -! CHECK: Subroutine foo +! CHECK: Subroutine foo2 subroutine foo2() ! CHECK-NEXT: <> !$acc parallel loop @@ -41,6 +42,7 @@ subroutine foo2() ! CHECK-NEXT: EndDoStmt ! CHECK-NEXT: <> end do + ! CHECK-NEXT: ContinueStmt !$acc end parallel loop ! CHECK-NEXT: <> ! CHECK-NEXT: EndSubroutineStmt From f1863f659e54a0c774d5b57c6b4969a5fda572ed Mon Sep 17 00:00:00 2001 From: V Donaldson Date: Mon, 1 Nov 2021 19:25:37 -0700 Subject: [PATCH 2/3] add test comments --- flang/test/Lower/OpenMP/omp-unstructured.f90 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flang/test/Lower/OpenMP/omp-unstructured.f90 b/flang/test/Lower/OpenMP/omp-unstructured.f90 index 6b3cbab167f9c..68f944b91abea 100644 --- a/flang/test/Lower/OpenMP/omp-unstructured.f90 +++ b/flang/test/Lower/OpenMP/omp-unstructured.f90 @@ -1,3 +1,5 @@ +! Test unstructured code adjacent to and inside OpenMP constructs. + ! RUN: bbc %s -fopenmp -o "-" | FileCheck %s ! CHECK-LABEL: func @_QPss1{{.*}} { @@ -16,7 +18,7 @@ ! CHECK: } ! CHECK: @_FortranAioBeginExternalListOutput ! CHECK: } -subroutine ss1(n) +subroutine ss1(n) ! unstructured code followed by a structured OpenMP construct do i = 1, 3 if (i .eq. n) exit print*, 'ss1-A', i @@ -44,7 +46,7 @@ subroutine ss1(n) ! CHECK: @_FortranAioBeginExternalListOutput ! CHECK: @_FortranAioBeginExternalListOutput ! CHECK: } -subroutine ss2(n) +subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct !$omp master print*, 'ss2-A', n do i = 1, 3 @@ -83,7 +85,7 @@ subroutine ss2(n) ! CHECK: omp.terminator ! CHECK: } ! CHECK: } -subroutine ss3(n) +subroutine ss3(n) ! nested unstructured OpenMP constructs !$omp parallel do i = 1, 3 !$omp do From 44f26063e81c7304c9d70dee9d5d265bc43f6502 Mon Sep 17 00:00:00 2001 From: V Donaldson Date: Wed, 3 Nov 2021 11:20:43 -0700 Subject: [PATCH 3/3] Add a partial fix for a problem with nested parallelism --- flang/lib/Lower/OpenMP.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 7f7013573d4da..af2d52d4775d5 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -101,9 +101,24 @@ void createEmptyRegionBlocks( auto *region = &firOpBuilder.getRegion(); for (auto &eval : evaluationList) { if (eval.block) { - assert(eval.block->empty() && "block is not empty"); - eval.block->erase(); - eval.block = firOpBuilder.createBlock(region); + if (eval.block->empty()) { + eval.block->erase(); + eval.block = firOpBuilder.createBlock(region); + } else { + [[maybe_unused]] auto &terminatorOp = eval.block->back(); + assert((mlir::isa(terminatorOp) || + mlir::isa(terminatorOp)) && + "expected terminator op"); + // FIXME: Some subset of cases may need to insert a branch, + // although this could be handled elsewhere. + // if (?) { + // auto insertPt = firOpBuilder.saveInsertionPoint(); + // firOpBuilder.setInsertionPointAfter(region->getParentOp()); + // firOpBuilder.create( + // terminatorOp.getLoc(), eval.block); + // firOpBuilder.restoreInsertionPoint(insertPt); + // } + } } if (eval.hasNestedEvaluations()) createEmptyRegionBlocks(firOpBuilder, eval.getNestedEvaluations());