From 1e47d70530775f0394af8b7e18b2f8987c4292cb Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Thu, 7 Mar 2024 11:36:29 +0000 Subject: [PATCH 1/2] [Flang][OpenMP][MLIR][Lower] Update lowering to use wrapper ops This patch introduces the following changes: - Complete OpenMP directive sets to include masked and taskloop-related ones. - Refactor clause processing to store related operands in structures. These are combined into construct-specific operand structures using a mixin pattern and simplify keeping track of operand lists used for creating OpenMP operations and reduces argument lists. - Update the lowering to MLIR of loop constructs to follow the wrapper + canonical loop approach. This includes some changes to privatization that need testing. - Add lowering for the omp.canonical_loop operation, based on omp.wsloop. - Significant refactor of OpenMP construct lowering, separating composite constructs into their own "gen" functions and handling combined loop constructs in a more scalable way. Updated genOMP functions to follow the same set of patterns to make it more understandable. Split clause processing calls from code generation for operations in preparation for reusing the same code when dealing with composite constructs. - Add basic support for taskloop, for completeness sake. - Document missing clauses. - Move some code from OpenMP.cpp to ClauseProcessor.cpp and call it there to simplify calls to certain process methods that were always followed by the same post-processing. - Add and populate "composite" attribute on wrapper operations. - Add empty constructors for wrapper operations. - Update `LoopWrapperInterface` to avoid compilation problems. - Update `Task*` operation's names to follow the same capitalization convention. - Changes to SCF to OpenMP conversion to produce a wrapper omp.wsloop with a nested omp.canonical_loop rather than an invalid omp.wsloop. - Document missing verifier checks on wrapper operations. - Make temporary changes to OpenMP to LLVM IR translation to make this compilable. All these changes are still WIP, as they haven't been tested. --- .../flang/Semantics/openmp-directive-sets.h | 172 +- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 267 +- flang/lib/Lower/OpenMP/ClauseProcessor.h | 121 +- .../lib/Lower/OpenMP/DataSharingProcessor.cpp | 32 +- flang/lib/Lower/OpenMP/DataSharingProcessor.h | 1 + flang/lib/Lower/OpenMP/OpenMP.cpp | 3261 ++++++++++------- flang/lib/Lower/OpenMP/OperationClauses.h | 306 ++ flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.h | 2 +- flang/lib/Lower/OpenMP/Utils.cpp | 19 + flang/lib/Lower/OpenMP/Utils.h | 3 + .../Transforms/DoConcurrentConversion.cpp | 17 +- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 8 +- .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 4 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 10 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 3 + .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 50 +- 17 files changed, 2503 insertions(+), 1775 deletions(-) create mode 100644 flang/lib/Lower/OpenMP/OperationClauses.h diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h index a4f27b00152e2..a40073ff47914 100644 --- a/flang/include/flang/Semantics/openmp-directive-sets.h +++ b/flang/include/flang/Semantics/openmp-directive-sets.h @@ -24,68 +24,80 @@ namespace llvm::omp { // - allSet: All standalone or combined uses of the directive. static const OmpDirectiveSet topParallelSet{ - Directive::OMPD_parallel, - Directive::OMPD_parallel_do, Directive::OMPD_parallel_do_simd, + Directive::OMPD_parallel_do, + Directive::OMPD_parallel_masked_taskloop_simd, + Directive::OMPD_parallel_masked_taskloop, + Directive::OMPD_parallel_master_taskloop_simd, + Directive::OMPD_parallel_master_taskloop, Directive::OMPD_parallel_sections, Directive::OMPD_parallel_workshare, + Directive::OMPD_parallel, }; static const OmpDirectiveSet allParallelSet{ - Directive::OMPD_distribute_parallel_do, - Directive::OMPD_distribute_parallel_do_simd, - Directive::OMPD_parallel, - Directive::OMPD_parallel_do, - Directive::OMPD_parallel_do_simd, - Directive::OMPD_parallel_sections, - Directive::OMPD_parallel_workshare, - Directive::OMPD_target_parallel, - Directive::OMPD_target_parallel_do, - Directive::OMPD_target_parallel_do_simd, - Directive::OMPD_target_teams_distribute_parallel_do, - Directive::OMPD_target_teams_distribute_parallel_do_simd, - Directive::OMPD_teams_distribute_parallel_do, - Directive::OMPD_teams_distribute_parallel_do_simd, + OmpDirectiveSet{ + Directive::OMPD_distribute_parallel_do_simd, + Directive::OMPD_distribute_parallel_do, + Directive::OMPD_target_parallel_do_simd, + Directive::OMPD_target_parallel_do, + Directive::OMPD_target_parallel, + Directive::OMPD_target_teams_distribute_parallel_do_simd, + Directive::OMPD_target_teams_distribute_parallel_do, + Directive::OMPD_teams_distribute_parallel_do_simd, + Directive::OMPD_teams_distribute_parallel_do, + } | topParallelSet, }; static const OmpDirectiveSet topDoSet{ - Directive::OMPD_do, Directive::OMPD_do_simd, + Directive::OMPD_do, }; static const OmpDirectiveSet allDoSet{ - Directive::OMPD_distribute_parallel_do, - Directive::OMPD_distribute_parallel_do_simd, - Directive::OMPD_parallel_do, - Directive::OMPD_parallel_do_simd, - Directive::OMPD_do, - Directive::OMPD_do_simd, - Directive::OMPD_target_parallel_do, - Directive::OMPD_target_parallel_do_simd, - Directive::OMPD_target_teams_distribute_parallel_do, - Directive::OMPD_target_teams_distribute_parallel_do_simd, - Directive::OMPD_teams_distribute_parallel_do, - Directive::OMPD_teams_distribute_parallel_do_simd, + OmpDirectiveSet{ + Directive::OMPD_distribute_parallel_do_simd, + Directive::OMPD_distribute_parallel_do, + Directive::OMPD_parallel_do_simd, + Directive::OMPD_parallel_do, + Directive::OMPD_target_parallel_do_simd, + Directive::OMPD_target_parallel_do, + Directive::OMPD_target_teams_distribute_parallel_do_simd, + Directive::OMPD_target_teams_distribute_parallel_do, + Directive::OMPD_teams_distribute_parallel_do_simd, + Directive::OMPD_teams_distribute_parallel_do, + } | topDoSet, }; static const OmpDirectiveSet topTaskloopSet{ - Directive::OMPD_taskloop, Directive::OMPD_taskloop_simd, + Directive::OMPD_taskloop, }; -static const OmpDirectiveSet allTaskloopSet{topTaskloopSet}; +static const OmpDirectiveSet allTaskloopSet{ + OmpDirectiveSet{ + Directive::OMPD_masked_taskloop_simd, + Directive::OMPD_masked_taskloop, + Directive::OMPD_master_taskloop_simd, + Directive::OMPD_master_taskloop, + Directive::OMPD_parallel_masked_taskloop_simd, + Directive::OMPD_parallel_masked_taskloop, + Directive::OMPD_parallel_master_taskloop_simd, + Directive::OMPD_parallel_master_taskloop, + } | topTaskloopSet, +}; static const OmpDirectiveSet topTargetSet{ - Directive::OMPD_target, - Directive::OMPD_target_parallel, - Directive::OMPD_target_parallel_do, Directive::OMPD_target_parallel_do_simd, + Directive::OMPD_target_parallel_do, + Directive::OMPD_target_parallel, Directive::OMPD_target_simd, - Directive::OMPD_target_teams, - Directive::OMPD_target_teams_distribute, - Directive::OMPD_target_teams_distribute_parallel_do, Directive::OMPD_target_teams_distribute_parallel_do_simd, + Directive::OMPD_target_teams_distribute_parallel_do, Directive::OMPD_target_teams_distribute_simd, + Directive::OMPD_target_teams_distribute, + Directive::OMPD_target_teams, + Directive::OMPD_target, }; static const OmpDirectiveSet allTargetSet{topTargetSet}; @@ -95,61 +107,61 @@ static const OmpDirectiveSet topSimdSet{ }; static const OmpDirectiveSet allSimdSet{ - Directive::OMPD_distribute_parallel_do_simd, - Directive::OMPD_distribute_simd, - Directive::OMPD_do_simd, - Directive::OMPD_parallel_do_simd, - Directive::OMPD_simd, - Directive::OMPD_target_parallel_do_simd, - Directive::OMPD_target_simd, - Directive::OMPD_target_teams_distribute_parallel_do_simd, - Directive::OMPD_target_teams_distribute_simd, - Directive::OMPD_taskloop_simd, - Directive::OMPD_teams_distribute_parallel_do_simd, - Directive::OMPD_teams_distribute_simd, + OmpDirectiveSet{ + Directive::OMPD_distribute_parallel_do_simd, + Directive::OMPD_distribute_simd, + Directive::OMPD_do_simd, + Directive::OMPD_masked_taskloop_simd, + Directive::OMPD_master_taskloop_simd, + Directive::OMPD_parallel_do_simd, + Directive::OMPD_parallel_masked_taskloop_simd, + Directive::OMPD_parallel_master_taskloop_simd, + Directive::OMPD_target_parallel_do_simd, + Directive::OMPD_target_simd, + Directive::OMPD_target_teams_distribute_parallel_do_simd, + Directive::OMPD_target_teams_distribute_simd, + Directive::OMPD_taskloop_simd, + Directive::OMPD_teams_distribute_parallel_do_simd, + Directive::OMPD_teams_distribute_simd, + } | topSimdSet, }; static const OmpDirectiveSet topTeamsSet{ - Directive::OMPD_teams, - Directive::OMPD_teams_distribute, - Directive::OMPD_teams_distribute_parallel_do, Directive::OMPD_teams_distribute_parallel_do_simd, + Directive::OMPD_teams_distribute_parallel_do, Directive::OMPD_teams_distribute_simd, + Directive::OMPD_teams_distribute, + Directive::OMPD_teams, }; static const OmpDirectiveSet allTeamsSet{ - llvm::omp::OMPD_target_teams, - llvm::omp::OMPD_target_teams_distribute, - llvm::omp::OMPD_target_teams_distribute_parallel_do, - llvm::omp::OMPD_target_teams_distribute_parallel_do_simd, - llvm::omp::OMPD_target_teams_distribute_simd, - llvm::omp::OMPD_teams, - llvm::omp::OMPD_teams_distribute, - llvm::omp::OMPD_teams_distribute_parallel_do, - llvm::omp::OMPD_teams_distribute_parallel_do_simd, - llvm::omp::OMPD_teams_distribute_simd, + OmpDirectiveSet{ + llvm::omp::OMPD_target_teams_distribute_parallel_do_simd, + llvm::omp::OMPD_target_teams_distribute_parallel_do, + llvm::omp::OMPD_target_teams_distribute_simd, + llvm::omp::OMPD_target_teams_distribute, + llvm::omp::OMPD_target_teams, + } | topTeamsSet, }; static const OmpDirectiveSet topDistributeSet{ - Directive::OMPD_distribute, - Directive::OMPD_distribute_parallel_do, Directive::OMPD_distribute_parallel_do_simd, + Directive::OMPD_distribute_parallel_do, Directive::OMPD_distribute_simd, + Directive::OMPD_distribute, }; static const OmpDirectiveSet allDistributeSet{ - llvm::omp::OMPD_distribute, - llvm::omp::OMPD_distribute_parallel_do, - llvm::omp::OMPD_distribute_parallel_do_simd, - llvm::omp::OMPD_distribute_simd, - llvm::omp::OMPD_target_teams_distribute, - llvm::omp::OMPD_target_teams_distribute_parallel_do, - llvm::omp::OMPD_target_teams_distribute_parallel_do_simd, - llvm::omp::OMPD_target_teams_distribute_simd, - llvm::omp::OMPD_teams_distribute, - llvm::omp::OMPD_teams_distribute_parallel_do, - llvm::omp::OMPD_teams_distribute_parallel_do_simd, - llvm::omp::OMPD_teams_distribute_simd, + OmpDirectiveSet{ + llvm::omp::OMPD_target_teams_distribute_parallel_do_simd, + llvm::omp::OMPD_target_teams_distribute_parallel_do, + llvm::omp::OMPD_target_teams_distribute_simd, + llvm::omp::OMPD_target_teams_distribute, + llvm::omp::OMPD_teams_distribute_parallel_do_simd, + llvm::omp::OMPD_teams_distribute_parallel_do, + llvm::omp::OMPD_teams_distribute_simd, + llvm::omp::OMPD_teams_distribute, + } | topDistributeSet, }; //===----------------------------------------------------------------------===// @@ -188,8 +200,16 @@ static const OmpDirectiveSet loopConstructSet{ Directive::OMPD_distribute, Directive::OMPD_do_simd, Directive::OMPD_do, + Directive::OMPD_masked_taskloop, + Directive::OMPD_masked_taskloop_simd, + Directive::OMPD_master_taskloop, + Directive::OMPD_master_taskloop_simd, Directive::OMPD_parallel_do_simd, Directive::OMPD_parallel_do, + Directive::OMPD_parallel_masked_taskloop, + Directive::OMPD_parallel_masked_taskloop_simd, + Directive::OMPD_parallel_master_taskloop, + Directive::OMPD_parallel_master_taskloop_simd, Directive::OMPD_simd, Directive::OMPD_target_parallel_do_simd, Directive::OMPD_target_parallel_do, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 833989cdc7023..4cb2aa74e1791 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -230,17 +230,30 @@ addUseDeviceClause(Fortran::lower::AbstractConverter &converter, } } +static void convertLoopBounds(Fortran::lower::AbstractConverter &converter, + mlir::Location loc, CollapseClauseOps &ops, + std::size_t loopVarTypeSize) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + // The types of lower bound, upper bound, and step are converted into the + // type of the loop variable if necessary. + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + for (unsigned it = 0; it < (unsigned)ops.loopLBVar.size(); it++) { + ops.loopLBVar[it] = + firOpBuilder.createConvert(loc, loopVarType, ops.loopLBVar[it]); + ops.loopUBVar[it] = + firOpBuilder.createConvert(loc, loopVarType, ops.loopUBVar[it]); + ops.loopStepVar[it] = + firOpBuilder.createConvert(loc, loopVarType, ops.loopStepVar[it]); + } +} + //===----------------------------------------------------------------------===// // ClauseProcessor unique clauses //===----------------------------------------------------------------------===// -bool ClauseProcessor::processCollapse( - mlir::Location currentLocation, Fortran::lower::pft::Evaluation &eval, - llvm::SmallVectorImpl &lowerBound, - llvm::SmallVectorImpl &upperBound, - llvm::SmallVectorImpl &step, - llvm::SmallVectorImpl &iv, - std::size_t &loopVarTypeSize) const { +bool ClauseProcessor::processCollapse(mlir::Location currentLocation, + Fortran::lower::pft::Evaluation &eval, + CollapseClauseOps &ops) const { bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -259,7 +272,7 @@ bool ClauseProcessor::processCollapse( found = true; } - loopVarTypeSize = 0; + std::size_t loopVarTypeSize = 0; do { Fortran::lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); @@ -271,18 +284,18 @@ bool ClauseProcessor::processCollapse( std::get_if(&loopControl->u); assert(bounds && "Expected bounds for worksharing do loop"); Fortran::lower::StatementContext stmtCtx; - lowerBound.push_back(fir::getBase(converter.genExprValue( + ops.loopLBVar.push_back(fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(bounds->lower), stmtCtx))); - upperBound.push_back(fir::getBase(converter.genExprValue( + ops.loopUBVar.push_back(fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(bounds->upper), stmtCtx))); if (bounds->step) { - step.push_back(fir::getBase(converter.genExprValue( + ops.loopStepVar.push_back(fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(bounds->step), stmtCtx))); } else { // If `step` is not present, assume it as `1`. - step.push_back(firOpBuilder.createIntegerConstant( + ops.loopStepVar.push_back(firOpBuilder.createIntegerConstant( currentLocation, firOpBuilder.getIntegerType(32), 1)); } - iv.push_back(bounds->name.thing.symbol); + ops.loopIV.push_back(bounds->name.thing.symbol); loopVarTypeSize = std::max(loopVarTypeSize, bounds->name.thing.symbol->GetUltimate().size()); collapseValue--; @@ -290,6 +303,9 @@ bool ClauseProcessor::processCollapse( &*std::next(doConstructEval->getNestedEvaluations().begin()); } while (collapseValue > 0); + if (found) + convertLoopBounds(converter, currentLocation, ops, loopVarTypeSize); + return found; } @@ -316,7 +332,7 @@ bool ClauseProcessor::processDefault() const { } bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const { + DeviceClauseOps &result) const { const Fortran::parser::CharBlock *source = nullptr; if (auto *deviceClause = findUniqueClause(&source)) { mlir::Location clauseLocation = converter.genLocation(*source); @@ -330,26 +346,26 @@ bool ClauseProcessor::processDevice(Fortran::lower::StatementContext &stmtCtx, } if (const auto *deviceExpr = Fortran::semantics::GetExpr( std::get(deviceClause->v.t))) { - result = fir::getBase(converter.genExprValue(*deviceExpr, stmtCtx)); + result.deviceVar = + fir::getBase(converter.genExprValue(*deviceExpr, stmtCtx)); } return true; } return false; } -bool ClauseProcessor::processDeviceType( - mlir::omp::DeclareTargetDeviceType &result) const { +bool ClauseProcessor::processDeviceType(DeviceTypeClauseOps &result) const { if (auto *deviceTypeClause = findUniqueClause()) { // Case: declare target ... device_type(any | host | nohost) switch (deviceTypeClause->v.v) { case Fortran::parser::OmpDeviceTypeClause::Type::Nohost: - result = mlir::omp::DeclareTargetDeviceType::nohost; + result.deviceType = mlir::omp::DeclareTargetDeviceType::nohost; break; case Fortran::parser::OmpDeviceTypeClause::Type::Host: - result = mlir::omp::DeclareTargetDeviceType::host; + result.deviceType = mlir::omp::DeclareTargetDeviceType::host; break; case Fortran::parser::OmpDeviceTypeClause::Type::Any: - result = mlir::omp::DeclareTargetDeviceType::any; + result.deviceType = mlir::omp::DeclareTargetDeviceType::any; break; } return true; @@ -358,7 +374,7 @@ bool ClauseProcessor::processDeviceType( } bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const { + FinalClauseOps &result) const { const Fortran::parser::CharBlock *source = nullptr; if (auto *finalClause = findUniqueClause(&source)) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -366,38 +382,38 @@ bool ClauseProcessor::processFinal(Fortran::lower::StatementContext &stmtCtx, mlir::Value finalVal = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(finalClause->v), stmtCtx)); - result = firOpBuilder.createConvert(clauseLocation, - firOpBuilder.getI1Type(), finalVal); + result.finalVar = firOpBuilder.createConvert( + clauseLocation, firOpBuilder.getI1Type(), finalVal); return true; } return false; } -bool ClauseProcessor::processHint(mlir::IntegerAttr &result) const { +bool ClauseProcessor::processHint(HintClauseOps &result) const { if (auto *hintClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); const auto *expr = Fortran::semantics::GetExpr(hintClause->v); int64_t hintValue = *Fortran::evaluate::ToInt64(*expr); - result = firOpBuilder.getI64IntegerAttr(hintValue); + result.hintAttr = firOpBuilder.getI64IntegerAttr(hintValue); return true; } return false; } -bool ClauseProcessor::processMergeable(mlir::UnitAttr &result) const { - return markClauseOccurrence(result); +bool ClauseProcessor::processMergeable(MergeableClauseOps &result) const { + return markClauseOccurrence(result.mergeableAttr); } -bool ClauseProcessor::processNowait(mlir::UnitAttr &result) const { - return markClauseOccurrence(result); +bool ClauseProcessor::processNowait(NowaitClauseOps &result) const { + return markClauseOccurrence(result.nowaitAttr); } bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const { + NumTeamsClauseOps &result) const { // TODO Get lower and upper bounds for num_teams when parser is updated to // accept both. if (auto *numTeamsClause = findUniqueClause()) { - result = fir::getBase(converter.genExprValue( + result.numTeamsUpperVar = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(numTeamsClause->v), stmtCtx)); return true; } @@ -405,17 +421,18 @@ bool ClauseProcessor::processNumTeams(Fortran::lower::StatementContext &stmtCtx, } bool ClauseProcessor::processNumThreads( - Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const { + Fortran::lower::StatementContext &stmtCtx, + NumThreadsClauseOps &result) const { if (auto *numThreadsClause = findUniqueClause()) { // OMPIRBuilder expects `NUM_THREADS` clause as a `Value`. - result = fir::getBase(converter.genExprValue( + result.numThreadsVar = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(numThreadsClause->v), stmtCtx)); return true; } return false; } -bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const { +bool ClauseProcessor::processOrdered(OrderedClauseOps &result) const { if (auto *orderedClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); int64_t orderedClauseValue = 0l; @@ -423,48 +440,45 @@ bool ClauseProcessor::processOrdered(mlir::IntegerAttr &result) const { const auto *expr = Fortran::semantics::GetExpr(orderedClause->v); orderedClauseValue = *Fortran::evaluate::ToInt64(*expr); } - result = firOpBuilder.getI64IntegerAttr(orderedClauseValue); + result.orderedAttr = firOpBuilder.getI64IntegerAttr(orderedClauseValue); return true; } return false; } bool ClauseProcessor::processPriority(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const { + PriorityClauseOps &result) const { if (auto *priorityClause = findUniqueClause()) { - result = fir::getBase(converter.genExprValue( + result.priorityVar = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(priorityClause->v), stmtCtx)); return true; } return false; } -bool ClauseProcessor::processProcBind( - mlir::omp::ClauseProcBindKindAttr &result) const { +bool ClauseProcessor::processProcBind(ProcBindClauseOps &result) const { if (auto *procBindClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - result = genProcBindKindAttr(firOpBuilder, procBindClause); + result.procBindKindAttr = genProcBindKindAttr(firOpBuilder, procBindClause); return true; } return false; } -bool ClauseProcessor::processSafelen(mlir::IntegerAttr &result) const { +bool ClauseProcessor::processSafelen(SafelenClauseOps &result) const { if (auto *safelenClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); const auto *expr = Fortran::semantics::GetExpr(safelenClause->v); const std::optional safelenVal = Fortran::evaluate::ToInt64(*expr); - result = firOpBuilder.getI64IntegerAttr(*safelenVal); + result.safelenAttr = firOpBuilder.getI64IntegerAttr(*safelenVal); return true; } return false; } -bool ClauseProcessor::processSchedule( - mlir::omp::ClauseScheduleKindAttr &valAttr, - mlir::omp::ScheduleModifierAttr &modifierAttr, - mlir::UnitAttr &simdModifierAttr) const { +bool ClauseProcessor::processSchedule(Fortran::lower::StatementContext &stmtCtx, + ScheduleClauseOps &result) const { if (auto *scheduleClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::MLIRContext *context = firOpBuilder.getContext(); @@ -496,26 +510,21 @@ bool ClauseProcessor::processSchedule( getScheduleModifier(scheduleClause->v); if (scheduleModifier != mlir::omp::ScheduleModifier::none) - modifierAttr = + result.scheduleModAttr = mlir::omp::ScheduleModifierAttr::get(context, scheduleModifier); if (getSimdModifier(scheduleClause->v) != mlir::omp::ScheduleModifier::none) - simdModifierAttr = firOpBuilder.getUnitAttr(); + result.scheduleSimdAttr = firOpBuilder.getUnitAttr(); - valAttr = mlir::omp::ClauseScheduleKindAttr::get(context, scheduleKind); - return true; - } - return false; -} + result.scheduleValAttr = + mlir::omp::ClauseScheduleKindAttr::get(context, scheduleKind); -bool ClauseProcessor::processScheduleChunk( - Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const { - if (auto *scheduleClause = findUniqueClause()) { if (const auto &chunkExpr = std::get>( scheduleClause->v.t)) { if (const auto *expr = Fortran::semantics::GetExpr(*chunkExpr)) { - result = fir::getBase(converter.genExprValue(*expr, stmtCtx)); + result.scheduleChunkVar = + fir::getBase(converter.genExprValue(*expr, stmtCtx)); } } return true; @@ -523,48 +532,47 @@ bool ClauseProcessor::processScheduleChunk( return false; } -bool ClauseProcessor::processSimdlen(mlir::IntegerAttr &result) const { +bool ClauseProcessor::processSimdlen(SimdlenClauseOps &result) const { if (auto *simdlenClause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); const auto *expr = Fortran::semantics::GetExpr(simdlenClause->v); const std::optional simdlenVal = Fortran::evaluate::ToInt64(*expr); - result = firOpBuilder.getI64IntegerAttr(*simdlenVal); + result.simdlenAttr = firOpBuilder.getI64IntegerAttr(*simdlenVal); return true; } return false; } bool ClauseProcessor::processThreadLimit( - Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const { + Fortran::lower::StatementContext &stmtCtx, + ThreadLimitClauseOps &result) const { if (auto *threadLmtClause = findUniqueClause()) { - result = fir::getBase(converter.genExprValue( + result.threadLimitVar = fir::getBase(converter.genExprValue( *Fortran::semantics::GetExpr(threadLmtClause->v), stmtCtx)); return true; } return false; } -bool ClauseProcessor::processUntied(mlir::UnitAttr &result) const { - return markClauseOccurrence(result); +bool ClauseProcessor::processUntied(UntiedClauseOps &result) const { + return markClauseOccurrence(result.untiedAttr); } //===----------------------------------------------------------------------===// // ClauseProcessor repeatable clauses //===----------------------------------------------------------------------===// -bool ClauseProcessor::processAllocate( - llvm::SmallVectorImpl &allocatorOperands, - llvm::SmallVectorImpl &allocateOperands) const { +bool ClauseProcessor::processAllocate(AllocateClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Allocate *allocateClause, const Fortran::parser::CharBlock &) { - genAllocateClause(converter, allocateClause->v, allocatorOperands, - allocateOperands); + genAllocateClause(converter, allocateClause->v, result.allocatorVars, + result.allocateVars); }); } -bool ClauseProcessor::processCopyin() const { +bool ClauseProcessor::processCopyin(CopyinClauseOps &) const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); @@ -709,10 +717,8 @@ createCopyFunc(mlir::Location loc, Fortran::lower::AbstractConverter &converter, return funcOp; } -bool ClauseProcessor::processCopyPrivate( - mlir::Location currentLocation, - llvm::SmallVectorImpl ©PrivateVars, - llvm::SmallVectorImpl ©PrivateFuncs) const { +bool ClauseProcessor::processCopyprivate(mlir::Location currentLocation, + CopyprivateClauseOps &result) const { auto addCopyPrivateVar = [&](Fortran::semantics::Symbol *sym) { mlir::Value symVal = converter.getSymbolAddress(*sym); auto declOp = symVal.getDefiningOp(); @@ -739,10 +745,10 @@ bool ClauseProcessor::processCopyPrivate( cpVar = alloca; } - copyPrivateVars.push_back(cpVar); + result.copyprivateVars.push_back(cpVar); mlir::func::FuncOp funcOp = createCopyFunc(currentLocation, converter, cpVar.getType(), attrs); - copyPrivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp)); + result.copyprivateFuncs.push_back(mlir::SymbolRefAttr::get(funcOp)); }; bool hasCopyPrivate = findRepeatableClause( @@ -765,9 +771,7 @@ bool ClauseProcessor::processCopyPrivate( return hasCopyPrivate; } -bool ClauseProcessor::processDepend( - llvm::SmallVectorImpl &dependTypeOperands, - llvm::SmallVectorImpl &dependOperands) const { +bool ClauseProcessor::processDepend(DependClauseOps &result) const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); return findRepeatableClause( @@ -780,8 +784,8 @@ bool ClauseProcessor::processDepend( .t); mlir::omp::ClauseTaskDependAttr dependTypeOperand = genDependKindAttr(firOpBuilder, dependClause); - dependTypeOperands.insert(dependTypeOperands.end(), depVal.size(), - dependTypeOperand); + result.dependTypeAttrs.insert(result.dependTypeAttrs.end(), + depVal.size(), dependTypeOperand); for (const Fortran::parser::Designator &ompObject : depVal) { Fortran::semantics::Symbol *sym = nullptr; std::visit( @@ -803,14 +807,14 @@ bool ClauseProcessor::processDepend( }}, (ompObject).u); const mlir::Value variable = converter.getSymbolAddress(*sym); - dependOperands.push_back(variable); + result.dependVars.push_back(variable); } }); } bool ClauseProcessor::processIf( Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName, - mlir::Value &result) const { + IfClauseOps &result) const { bool found = false; findRepeatableClause( [&](const ClauseTy::If *ifClause, @@ -821,21 +825,21 @@ bool ClauseProcessor::processIf( // Assume that, at most, a single 'if' clause will be applicable to the // given directive. if (operand) { - result = operand; + result.ifVar = operand; found = true; } }); return found; } -bool ClauseProcessor::processLink( - llvm::SmallVectorImpl &result) const { +bool ClauseProcessor::processLink(EnterLinkToClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Link *linkClause, const Fortran::parser::CharBlock &) { // Case: declare target link(var1, var2)... - gatherFuncAndVarSyms( - linkClause->v, mlir::omp::DeclareTargetCaptureClause::link, result); + gatherFuncAndVarSyms(linkClause->v, + mlir::omp::DeclareTargetCaptureClause::link, + result.symbolAndClause); }); } @@ -863,14 +867,9 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, return op; } -bool ClauseProcessor::processMap( - mlir::Location currentLocation, const llvm::omp::Directive &directive, - Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands, - llvm::SmallVectorImpl *mapSymTypes, - llvm::SmallVectorImpl *mapSymLocs, - llvm::SmallVectorImpl *mapSymbols) - const { +bool ClauseProcessor::processMap(mlir::Location currentLocation, + Fortran::lower::StatementContext &stmtCtx, + MapClauseOps &result) const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); return findRepeatableClause( [&](const ClauseTy::Map *mapClause, @@ -946,100 +945,92 @@ bool ClauseProcessor::processMap( mapTypeBits), mlir::omp::VariableCaptureKind::ByRef, symAddr.getType()); - mapOperands.push_back(mapOp); - if (mapSymTypes) - mapSymTypes->push_back(symAddr.getType()); - if (mapSymLocs) - mapSymLocs->push_back(symAddr.getLoc()); - - if (mapSymbols) - mapSymbols->push_back(getOmpObjectSymbol(ompObject)); + result.mapVars.push_back(mapOp); + if (result.mapSymTypes) + result.mapSymTypes->push_back(symAddr.getType()); + if (result.mapSymLocs) + result.mapSymLocs->push_back(symAddr.getLoc()); + if (result.mapSymbols) + result.mapSymbols->push_back(getOmpObjectSymbol(ompObject)); } }); } bool ClauseProcessor::processTargetReduction( - llvm::SmallVector &reductionSymbols) - const { + TargetReductionClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Reduction *reductionClause, const Fortran::parser::CharBlock &) { ReductionProcessor rp; - rp.addReductionSym(reductionClause->v, reductionSymbols); + rp.addReductionSym(reductionClause->v, result.targetReductionSymbols); }); } -bool ClauseProcessor::processReduction( - mlir::Location currentLocation, - llvm::SmallVectorImpl &reductionVars, - llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl *reductionSymbols) - const { +bool ClauseProcessor::processReduction(mlir::Location currentLocation, + ReductionClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Reduction *reductionClause, const Fortran::parser::CharBlock &) { ReductionProcessor rp; rp.addReductionDecl(currentLocation, converter, reductionClause->v, - reductionVars, reductionDeclSymbols, - reductionSymbols); + result.reductionVars, result.reductionDeclSymbols, + result.reductionSymbols ? &*result.reductionSymbols + : nullptr); + result.reductionTypes.reserve(result.reductionVars.size()); + llvm::transform(result.reductionVars, + std::back_inserter(result.reductionTypes), + [](mlir::Value v) { return v.getType(); }); }); } -bool ClauseProcessor::processSectionsReduction( - mlir::Location currentLocation) const { +bool ClauseProcessor::processSectionsReduction(mlir::Location currentLocation, + ReductionClauseOps &) const { return findRepeatableClause( [&](const ClauseTy::Reduction *, const Fortran::parser::CharBlock &) { + // Either implement special handling or remove this method and use the + // generic processReduction() method instead. TODO(currentLocation, "OMPC_Reduction"); }); } -bool ClauseProcessor::processTo( - llvm::SmallVectorImpl &result) const { +bool ClauseProcessor::processTo(EnterLinkToClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::To *toClause, const Fortran::parser::CharBlock &) { // Case: declare target to(func, var1, var2)... gatherFuncAndVarSyms(toClause->v, - mlir::omp::DeclareTargetCaptureClause::to, result); + mlir::omp::DeclareTargetCaptureClause::to, + result.symbolAndClause); }); } -bool ClauseProcessor::processEnter( - llvm::SmallVectorImpl &result) const { +bool ClauseProcessor::processEnter(EnterLinkToClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::Enter *enterClause, const Fortran::parser::CharBlock &) { // Case: declare target enter(func, var1, var2)... gatherFuncAndVarSyms(enterClause->v, mlir::omp::DeclareTargetCaptureClause::enter, - result); + result.symbolAndClause); }); } -bool ClauseProcessor::processUseDeviceAddr( - llvm::SmallVectorImpl &operands, - llvm::SmallVectorImpl &useDeviceTypes, - llvm::SmallVectorImpl &useDeviceLocs, - llvm::SmallVectorImpl &useDeviceSymbols) - const { +bool ClauseProcessor::processUseDeviceAddr(UseDeviceClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::UseDeviceAddr *devAddrClause, const Fortran::parser::CharBlock &) { - addUseDeviceClause(converter, devAddrClause->v, operands, - useDeviceTypes, useDeviceLocs, useDeviceSymbols); + addUseDeviceClause(converter, devAddrClause->v, + result.useDeviceAddrVars, result.useDeviceTypes, + result.useDeviceLocs, result.useDeviceSymbols); }); } -bool ClauseProcessor::processUseDevicePtr( - llvm::SmallVectorImpl &operands, - llvm::SmallVectorImpl &useDeviceTypes, - llvm::SmallVectorImpl &useDeviceLocs, - llvm::SmallVectorImpl &useDeviceSymbols) - const { +bool ClauseProcessor::processUseDevicePtr(UseDeviceClauseOps &result) const { return findRepeatableClause( [&](const ClauseTy::UseDevicePtr *devPtrClause, const Fortran::parser::CharBlock &) { - addUseDeviceClause(converter, devPtrClause->v, operands, useDeviceTypes, - useDeviceLocs, useDeviceSymbols); + addUseDeviceClause(converter, devPtrClause->v, result.useDevicePtrVars, + result.useDeviceTypes, result.useDeviceLocs, + result.useDeviceSymbols); }); } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 0b91aca3d1344..c6b0b73dd1a56 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -13,6 +13,7 @@ #define FORTRAN_LOWER_CLAUASEPROCESSOR_H #include "DirectivesCommon.h" +#include "OperationClauses.h" #include "ReductionProcessor.h" #include "Utils.h" #include "flang/Lower/AbstractConverter.h" @@ -54,101 +55,62 @@ class ClauseProcessor { : converter(converter), semaCtx(semaCtx), clauses(clauses) {} // 'Unique' clauses: They can appear at most once in the clause list. - bool - processCollapse(mlir::Location currentLocation, - Fortran::lower::pft::Evaluation &eval, - llvm::SmallVectorImpl &lowerBound, - llvm::SmallVectorImpl &upperBound, - llvm::SmallVectorImpl &step, - llvm::SmallVectorImpl &iv, - std::size_t &loopVarTypeSize) const; + bool processCollapse(mlir::Location currentLocation, + Fortran::lower::pft::Evaluation &eval, + CollapseClauseOps &result) const; bool processDefault() const; bool processDevice(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processDeviceType(mlir::omp::DeclareTargetDeviceType &result) const; + DeviceClauseOps &result) const; + bool processDeviceType(DeviceTypeClauseOps &result) const; bool processFinal(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processHint(mlir::IntegerAttr &result) const; - bool processMergeable(mlir::UnitAttr &result) const; - bool processNowait(mlir::UnitAttr &result) const; + FinalClauseOps &result) const; + bool processHint(HintClauseOps &result) const; + bool processMergeable(MergeableClauseOps &result) const; + bool processNowait(NowaitClauseOps &result) const; bool processNumTeams(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; + NumTeamsClauseOps &result) const; bool processNumThreads(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processOrdered(mlir::IntegerAttr &result) const; + NumThreadsClauseOps &result) const; + bool processOrdered(OrderedClauseOps &result) const; bool processPriority(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processProcBind(mlir::omp::ClauseProcBindKindAttr &result) const; - bool processSafelen(mlir::IntegerAttr &result) const; - bool processSchedule(mlir::omp::ClauseScheduleKindAttr &valAttr, - mlir::omp::ScheduleModifierAttr &modifierAttr, - mlir::UnitAttr &simdModifierAttr) const; - bool processScheduleChunk(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processSimdlen(mlir::IntegerAttr &result) const; + PriorityClauseOps &result) const; + bool processProcBind(ProcBindClauseOps &result) const; + bool processSafelen(SafelenClauseOps &result) const; + bool processSchedule(Fortran::lower::StatementContext &stmtCtx, + ScheduleClauseOps &result) const; + bool processSimdlen(SimdlenClauseOps &result) const; bool processThreadLimit(Fortran::lower::StatementContext &stmtCtx, - mlir::Value &result) const; - bool processUntied(mlir::UnitAttr &result) const; + ThreadLimitClauseOps &result) const; + bool processUntied(UntiedClauseOps &result) const; // 'Repeatable' clauses: They can appear multiple times in the clause list. - bool - processAllocate(llvm::SmallVectorImpl &allocatorOperands, - llvm::SmallVectorImpl &allocateOperands) const; - bool processCopyin() const; - bool processCopyPrivate( - mlir::Location currentLocation, - llvm::SmallVectorImpl ©PrivateVars, - llvm::SmallVectorImpl ©PrivateFuncs) const; - bool processDepend(llvm::SmallVectorImpl &dependTypeOperands, - llvm::SmallVectorImpl &dependOperands) const; - bool - processEnter(llvm::SmallVectorImpl &result) const; + bool processAllocate(AllocateClauseOps &result) const; + bool processCopyin(CopyinClauseOps &result) const; + bool processCopyprivate(mlir::Location currentLocation, + CopyprivateClauseOps &result) const; + bool processDepend(DependClauseOps &result) const; + bool processEnter(EnterLinkToClauseOps &result) const; bool processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName, - mlir::Value &result) const; - bool - processLink(llvm::SmallVectorImpl &result) const; + IfClauseOps &result) const; + bool processLink(EnterLinkToClauseOps &result) const; // This method is used to process a map clause. - // The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to - // store the original type, location and Fortran symbol for the map operands. - // They may be used later on to create the block_arguments for some of the - // target directives that require it. bool processMap(mlir::Location currentLocation, - const llvm::omp::Directive &directive, Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands, - llvm::SmallVectorImpl *mapSymTypes = nullptr, - llvm::SmallVectorImpl *mapSymLocs = nullptr, - llvm::SmallVectorImpl - *mapSymbols = nullptr) const; - bool - processReduction(mlir::Location currentLocation, - llvm::SmallVectorImpl &reductionVars, - llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl - *reductionSymbols = nullptr) const; - bool processTargetReduction( - llvm::SmallVector &reductionSymbols) - const; - bool processSectionsReduction(mlir::Location currentLocation) const; - bool processTo(llvm::SmallVectorImpl &result) const; - bool - processUseDeviceAddr(llvm::SmallVectorImpl &operands, - llvm::SmallVectorImpl &useDeviceTypes, - llvm::SmallVectorImpl &useDeviceLocs, - llvm::SmallVectorImpl - &useDeviceSymbols) const; - bool - processUseDevicePtr(llvm::SmallVectorImpl &operands, - llvm::SmallVectorImpl &useDeviceTypes, - llvm::SmallVectorImpl &useDeviceLocs, - llvm::SmallVectorImpl - &useDeviceSymbols) const; + MapClauseOps &result) const; + bool processReduction(mlir::Location currentLocation, + ReductionClauseOps &result) const; + bool processTargetReduction(TargetReductionClauseOps &result) const; + bool processSectionsReduction(mlir::Location currentLocation, + ReductionClauseOps &result) const; + bool processTo(EnterLinkToClauseOps &result) const; + bool processUseDeviceAddr(UseDeviceClauseOps &result) const; + bool processUseDevicePtr(UseDeviceClauseOps &result) const; template bool processMotionClauses(Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands); + MapClauseOps &result); // Call this method for these clauses that should be supported but are not // implemented yet. It triggers a compilation error if any of the given @@ -189,8 +151,7 @@ class ClauseProcessor { template bool ClauseProcessor::processMotionClauses( - Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl &mapOperands) { + Fortran::lower::StatementContext &stmtCtx, MapClauseOps &result) { return findRepeatableClause( [&](const T *motionClause, const Fortran::parser::CharBlock &source) { mlir::Location clauseLocation = converter.genLocation(source); @@ -232,7 +193,7 @@ bool ClauseProcessor::processMotionClauses( mapTypeBits), mlir::omp::VariableCaptureKind::ByRef, symAddr.getType()); - mapOperands.push_back(mapOp); + result.mapVars.push_back(mapOp); } }); } diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index 717b8cc0276a3..5c27a91bd469f 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -213,21 +213,23 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP); } } - } else if (mlir::isa(op)) { - // Update the original variable just before exiting the worksharing - // loop. Conversion as follows: + } else if (mlir::isa(op)) { + // TODO Check that the change from WsLoopOp to LoopNestOp didn't + // break anything here. + // Update the original variable just before exiting the loop. Conversion + // as follows: // - // omp.wsloop { - // omp.wsloop { ... - // ... store - // store ===> %v = arith.addi %iv, %step - // omp.yield %cmp = %step < 0 ? %v < %ub : %v > %ub - // } fir.if %cmp { - // fir.store %v to %loopIV - // ^%lpv_update_blk: - // } - // omp.yield + // omp.loopnest { + // omp.loopnest { ... + // ... store + // store ===> %v = arith.addi %iv, %step + // omp.yield %cmp = %step < 0 ? %v < %ub : %v > %ub + // } fir.if %cmp { + // fir.store %v to %loopIV + // ^%lpv_update_blk: // } + // omp.yield + // } // // Only generate the compare once in presence of multiple LastPrivate @@ -242,8 +244,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { mlir::Value iv = op->getRegion(0).front().getArguments()[0]; mlir::Value ub = - mlir::dyn_cast(op).getUpperBound()[0]; - mlir::Value step = mlir::dyn_cast(op).getStep()[0]; + mlir::cast(op).getUpperBound()[0]; + mlir::Value step = mlir::cast(op).getStep()[0]; // v = iv + step // cmp = step < 0 ? v < ub : v > ub diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index 9f7301df07598..c22d1e966df90 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -24,6 +24,7 @@ namespace omp { class DataSharingProcessor { public: + // TODO Replace with PrivateClauseOps. /// Collects all the information needed for delayed privatization. This can be /// used by ops with data-sharing clauses to properly generate their regions /// (e.g. add region arguments) and map the original SSA values to their diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index e499e16c19e04..fdb8ef4977bc9 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -38,38 +38,6 @@ using namespace Fortran::lower::omp; -//===----------------------------------------------------------------------===// -// Code generation helper functions -//===----------------------------------------------------------------------===// - -static Fortran::lower::pft::Evaluation * -getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) { - // Return the Evaluation of the innermost collapsed loop, or the current one - // if there was no COLLAPSE. - if (collapseValue == 0) - return &eval; - - Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation(); - for (int i = 1; i < collapseValue; i++) { - // The nested evaluations should be DoConstructs (i.e. they should form - // a loop nest). Each DoConstruct is a tuple . - assert(curEval->isA()); - curEval = &*std::next(curEval->getNestedEvaluations().begin()); - } - return curEval; -} - -static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter, - Fortran::lower::pft::Evaluation &eval, - int collapseValue = 0) { - Fortran::lower::pft::Evaluation *curEval = - getCollapsedLoopEval(eval, collapseValue); - - for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations()) - converter.genEval(e); -} - //===----------------------------------------------------------------------===// // HostClausesInsertionGuard //===----------------------------------------------------------------------===// @@ -139,6 +107,113 @@ class HostClausesInsertionGuard { } }; +//===----------------------------------------------------------------------===// +// OpWithBodyGenInfo +//===----------------------------------------------------------------------===// + +struct OpWithBodyGenInfo { + /// A type for a code-gen callback function. This takes as argument the op for + /// which the code is being generated and returns the arguments of the op's + /// region. + using GenOMPRegionEntryCBFn = + std::function( + mlir::Operation *)>; + + OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + mlir::Location loc, Fortran::lower::pft::Evaluation &eval) + : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {} + + OpWithBodyGenInfo &setGenNested(bool value) { + genNested = value; + return *this; + } + + OpWithBodyGenInfo &setOuterCombined(bool value) { + outerCombined = value; + return *this; + } + + OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) { + clauses = value; + return *this; + } + + OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) { + dsp = value; + return *this; + } + + OpWithBodyGenInfo & + setReductions(llvm::ArrayRef symbols, + llvm::ArrayRef types) { + reductionSymbols = symbols; + reductionTypes = types; + return *this; + } + + OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) { + genRegionEntryCB = value; + return *this; + } + + /// [inout] converter to use for the clauses. + Fortran::lower::AbstractConverter &converter; + /// [in] Semantics context + Fortran::semantics::SemanticsContext &semaCtx; + /// [in] location in source code. + mlir::Location loc; + /// [in] current PFT node/evaluation. + Fortran::lower::pft::Evaluation &eval; + /// [in] whether to generate FIR for nested evaluations + bool genNested = true; + /// [in] is this an outer operation - prevents privatization. + bool outerCombined = false; + /// [in] list of clauses to process. + const Fortran::parser::OmpClauseList *clauses = nullptr; + /// [in] if provided, processes the construct's data-sharing attributes. + DataSharingProcessor *dsp = nullptr; + /// [in] if provided, list of reduction symbols + llvm::ArrayRef reductionSymbols; + /// [in] if provided, list of reduction types + llvm::ArrayRef reductionTypes; + /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block + /// is created in the region. + GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; +}; + +//===----------------------------------------------------------------------===// +// Code generation helper functions +//===----------------------------------------------------------------------===// + +static Fortran::lower::pft::Evaluation * +getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) { + // Return the Evaluation of the innermost collapsed loop, or the current one + // if there was no COLLAPSE. + if (collapseValue == 0) + return &eval; + + Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation(); + for (int i = 1; i < collapseValue; i++) { + // The nested evaluations should be DoConstructs (i.e. they should form + // a loop nest). Each DoConstruct is a tuple . + assert(curEval->isA()); + curEval = &*std::next(curEval->getNestedEvaluations().begin()); + } + return curEval; +} + +static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + int collapseValue = 0) { + Fortran::lower::pft::Evaluation *curEval = + getCollapsedLoopEval(eval, collapseValue); + + for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations()) + converter.genEval(e); +} + static fir::GlobalOp globalInitialization( Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym, @@ -282,268 +357,80 @@ static void threadPrivatizeVars(Fortran::lower::AbstractConverter &converter, firOpBuilder.restoreInsertionPoint(insPt); } -static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, - std::size_t loopVarTypeSize) { - // OpenMP runtime requires 32-bit or 64-bit loop variables. - loopVarTypeSize = loopVarTypeSize * 8; - if (loopVarTypeSize < 32) { - loopVarTypeSize = 32; - } else if (loopVarTypeSize > 64) { - loopVarTypeSize = 64; - mlir::emitWarning(converter.getCurrentLocation(), - "OpenMP loop iteration variable cannot have more than 64 " - "bits size and will be narrowed into 64 bits."); - } - assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) && - "OpenMP loop iteration variable size must be transformed into 32-bit " - "or 64-bit"); - return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); -} +/// Create the body (block) for an OpenMP Operation. +/// +/// \param [in] op - the operation the body belongs to. +/// \param [in] info - options controlling code-gen for the construction. +template +static void createBodyOfOp(Op &op, const OpWithBodyGenInfo &info) { + fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); -static mlir::Operation * -createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, - mlir::Location loc, mlir::Value indexVal, - const Fortran::semantics::Symbol *sym) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); - firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); + auto insertMarker = [](fir::FirOpBuilder &builder) { + mlir::Value undef = builder.create(builder.getUnknownLoc(), + builder.getIndexType()); + return undef.getDefiningOp(); + }; - mlir::Type tempTy = converter.genType(*sym); - mlir::Value temp = firOpBuilder.create( - loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{}, - /*shapeParams*/ mlir::ValueRange{}, - llvm::ArrayRef{ - fir::getAdaptToByRefAttr(firOpBuilder)}); - converter.bindSymbol(*sym, temp); - firOpBuilder.restoreInsertionPoint(insPt); - mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); - mlir::Operation *storeOp = firOpBuilder.create( - loc, cvtVal, converter.getSymbolAddress(*sym)); - return storeOp; -} + // If an argument for the region is provided then create the block with that + // argument. Also update the symbol's address with the mlir argument value. + // e.g. For loops the argument is the induction variable. And all further + // uses of the induction variable should use this mlir value. + auto regionArgs = + [&]() -> llvm::SmallVector { + if (info.genRegionEntryCB != nullptr) { + return info.genRegionEntryCB(op); + } -static mlir::Value -calculateTripCount(Fortran::lower::AbstractConverter &converter, - mlir::Location loc, llvm::ArrayRef lbs, - llvm::ArrayRef ubs, - llvm::ArrayRef steps) { - using namespace mlir::arith; - assert(lbs.size() == ubs.size() && lbs.size() == steps.size() && - !lbs.empty() && "Invalid bounds or step"); + firOpBuilder.createBlock(&op.getRegion()); + return {}; + }(); + // Mark the earliest insertion point. + mlir::Operation *marker = insertMarker(firOpBuilder); - fir::FirOpBuilder &b = converter.getFirOpBuilder(); + // If it is an unstructured region and is not the outer region of a combined + // construct, create empty blocks for all evaluations. + if (info.eval.lowerAsUnstructured() && !info.outerCombined) + Fortran::lower::createEmptyRegionBlocks( + firOpBuilder, info.eval.getNestedEvaluations()); - // Get the bit width of an integer-like type. - auto widthOf = [](mlir::Type ty) -> unsigned { - if (mlir::isa(ty)) { - return mlir::IndexType::kInternalStorageBitWidth; - } - if (auto tyInt = mlir::dyn_cast(ty)) { - return tyInt.getWidth(); - } - llvm_unreachable("Unexpected type"); - }; + // Start with privatization, so that the lowering of the nested + // code will use the right symbols. + // TODO Check that nothing broke from replacing WsLoopOp and SimdLoopOp here. + constexpr bool isLoop = std::is_same_v; + bool privatize = info.clauses && !info.outerCombined; - // For a type that is either IntegerType or IndexType, return the - // equivalent IntegerType. In the former case this is a no-op. - auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType { - if (ty.isIndex()) { - return mlir::IntegerType::get(ty.getContext(), widthOf(ty)); + firOpBuilder.setInsertionPoint(marker); + std::optional tempDsp; + if (privatize) { + if (!info.dsp) { + tempDsp.emplace(info.converter, *info.clauses, info.eval); + tempDsp->processStep1(); } - assert(ty.isIntOrIndex() && "Unexpected type"); - return mlir::cast(ty); - }; + } - // For two given values, establish a common signless IntegerType - // that can represent any value of type of x and of type of y, - // and return the pair of x, y converted to the new type. - auto unifyToSignless = - [&](fir::FirOpBuilder &b, mlir::Value x, - mlir::Value y) -> std::pair { - auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType()); - unsigned width = std::max(widthOf(tyX), widthOf(tyY)); - auto wideTy = mlir::IntegerType::get(b.getContext(), width, - mlir::IntegerType::Signless); - return std::make_pair(b.createConvert(loc, wideTy, x), - b.createConvert(loc, wideTy, y)); - }; + if constexpr (std::is_same_v) { + threadPrivatizeVars(info.converter, info.eval); + if (info.clauses) { + firOpBuilder.setInsertionPoint(marker); + CopyinClauseOps clauseOps; + ClauseProcessor(info.converter, info.semaCtx, *info.clauses) + .processCopyin(clauseOps); + } + } - // Start with signless i32 by default. - auto tripCount = b.createIntegerConstant(loc, b.getI32Type(), 1); - - for (auto [origLb, origUb, origStep] : llvm::zip(lbs, ubs, steps)) { - auto tmpS0 = b.createIntegerConstant(loc, origStep.getType(), 0); - auto [step, step0] = unifyToSignless(b, origStep, tmpS0); - auto reverseCond = b.create(loc, CmpIPredicate::slt, step, step0); - auto negStep = b.create(loc, step0, step); - mlir::Value absStep = b.create(loc, reverseCond, negStep, step); - - auto [lb, ub] = unifyToSignless(b, origLb, origUb); - auto start = b.create(loc, reverseCond, ub, lb); - auto end = b.create(loc, reverseCond, lb, ub); - - mlir::Value range = b.create(loc, end, start); - auto rangeCond = b.create(loc, CmpIPredicate::slt, end, start); - std::tie(range, absStep) = unifyToSignless(b, range, absStep); - // numSteps = (range /u absStep) + 1 - auto numSteps = - b.create(loc, b.create(loc, range, absStep), - b.createIntegerConstant(loc, range.getType(), 1)); - - auto trip0 = b.createIntegerConstant(loc, numSteps.getType(), 0); - auto loopTripCount = b.create(loc, rangeCond, trip0, numSteps); - auto [totalTC, thisTC] = unifyToSignless(b, tripCount, loopTripCount); - tripCount = b.create(loc, totalTC, thisTC); - } - - return tripCount; -} - -struct OpWithBodyGenInfo { - /// A type for a code-gen callback function. This takes as argument the op for - /// which the code is being generated and returns the arguments of the op's - /// region. - using GenOMPRegionEntryCBFn = - std::function( - mlir::Operation *)>; - - OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - mlir::Location loc, Fortran::lower::pft::Evaluation &eval) - : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval) {} - - OpWithBodyGenInfo &setGenNested(bool value) { - genNested = value; - return *this; - } - - OpWithBodyGenInfo &setOuterCombined(bool value) { - outerCombined = value; - return *this; - } - - OpWithBodyGenInfo &setClauses(const Fortran::parser::OmpClauseList *value) { - clauses = value; - return *this; - } - - OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) { - dsp = value; - return *this; - } - - OpWithBodyGenInfo & - setReductions(llvm::SmallVector *value1, - llvm::SmallVector *value2) { - reductionSymbols = value1; - reductionTypes = value2; - return *this; - } - - OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) { - genRegionEntryCB = value; - return *this; - } - - /// [inout] converter to use for the clauses. - Fortran::lower::AbstractConverter &converter; - /// [in] Semantics context - Fortran::semantics::SemanticsContext &semaCtx; - /// [in] location in source code. - mlir::Location loc; - /// [in] current PFT node/evaluation. - Fortran::lower::pft::Evaluation &eval; - /// [in] whether to generate FIR for nested evaluations - bool genNested = true; - /// [in] is this an outer operation - prevents privatization. - bool outerCombined = false; - /// [in] list of clauses to process. - const Fortran::parser::OmpClauseList *clauses = nullptr; - /// [in] if provided, processes the construct's data-sharing attributes. - DataSharingProcessor *dsp = nullptr; - /// [in] if provided, list of reduction symbols - llvm::SmallVector *reductionSymbols = - nullptr; - /// [in] if provided, list of reduction types - llvm::SmallVector *reductionTypes = nullptr; - /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block - /// is created in the region. - GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; -}; - -/// Create the body (block) for an OpenMP Operation. -/// -/// \param [in] op - the operation the body belongs to. -/// \param [in] info - options controlling code-gen for the construction. -template -static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) { - fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); - - auto insertMarker = [](fir::FirOpBuilder &builder) { - mlir::Value undef = builder.create(builder.getUnknownLoc(), - builder.getIndexType()); - return undef.getDefiningOp(); - }; - - // If an argument for the region is provided then create the block with that - // argument. Also update the symbol's address with the mlir argument value. - // e.g. For loops the argument is the induction variable. And all further - // uses of the induction variable should use this mlir value. - auto regionArgs = - [&]() -> llvm::SmallVector { - if (info.genRegionEntryCB != nullptr) { - return info.genRegionEntryCB(op); - } - - firOpBuilder.createBlock(&op.getRegion()); - return {}; - }(); - // Mark the earliest insertion point. - mlir::Operation *marker = insertMarker(firOpBuilder); - - // If it is an unstructured region and is not the outer region of a combined - // construct, create empty blocks for all evaluations. - if (info.eval.lowerAsUnstructured() && !info.outerCombined) - Fortran::lower::createEmptyRegionBlocks( - firOpBuilder, info.eval.getNestedEvaluations()); - - // Start with privatization, so that the lowering of the nested - // code will use the right symbols. - constexpr bool isLoop = std::is_same_v || - std::is_same_v; - bool privatize = info.clauses && !info.outerCombined; - - firOpBuilder.setInsertionPoint(marker); - std::optional tempDsp; - if (privatize) { - if (!info.dsp) { - tempDsp.emplace(info.converter, *info.clauses, info.eval); - tempDsp->processStep1(); - } - } - - if constexpr (std::is_same_v) { - threadPrivatizeVars(info.converter, info.eval); - if (info.clauses) { - firOpBuilder.setInsertionPoint(marker); - ClauseProcessor(info.converter, info.semaCtx, *info.clauses) - .processCopyin(); - } - } - - if (info.genNested) { - // genFIR(Evaluation&) tries to patch up unterminated blocks, causing - // a lot of complications for our approach if the terminator generation - // is delayed past this point. Insert a temporary terminator here, then - // delete it. - firOpBuilder.setInsertionPointToEnd(&op.getRegion().back()); - auto *temp = Fortran::lower::genOpenMPTerminator( - firOpBuilder, op.getOperation(), info.loc); - firOpBuilder.setInsertionPointAfter(marker); - genNestedEvaluations(info.converter, info.eval); - temp->erase(); - } + if (info.genNested) { + // genFIR(Evaluation&) tries to patch up unterminated blocks, causing + // a lot of complications for our approach if the terminator generation + // is delayed past this point. Insert a temporary terminator here, then + // delete it. + firOpBuilder.setInsertionPointToEnd(&op.getRegion().back()); + auto *temp = Fortran::lower::genOpenMPTerminator( + firOpBuilder, op.getOperation(), info.loc); + firOpBuilder.setInsertionPointAfter(marker); + genNestedEvaluations(info.converter, info.eval); + temp->erase(); + } // Get or create a unique exiting block from the given region, or // return nullptr if there is no exiting block. @@ -672,453 +559,43 @@ static void genBodyOfTargetDataOp( genNestedEvaluations(converter, eval); } -template -static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) { - auto op = info.converter.getFirOpBuilder().create( - info.loc, std::forward(args)...); - createBodyOfOp(op, info); - return op; -} - -static mlir::omp::MasterOp -genMasterOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation) { - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested), - /*resultTypes=*/mlir::TypeRange()); -} - -static mlir::omp::OrderedRegionOp -genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation) { - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested), - /*simd=*/false); -} - -static bool evalHasSiblings(Fortran::lower::pft::Evaluation &eval) { - return eval.parent.visit(Fortran::common::visitors{ - [&](const Fortran::lower::pft::Program &parent) { - return parent.getUnits().size() + parent.getCommonBlocks().size() > 1; - }, - [&](const Fortran::lower::pft::Evaluation &parent) { - for (auto &sibling : *parent.evaluationList) - if (&sibling != &eval && !sibling.isEndStmt()) - return true; - - return false; - }, - [&](const auto &parent) { - for (auto &sibling : parent.evaluationList) - if (&sibling != &eval && !sibling.isEndStmt()) - return true; +// This functions creates a block for the body of the targetOp's region. It adds +// all the symbols present in mapSymbols as block arguments to this block. +static void genBodyOfTargetOp( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::omp::TargetOp &targetOp, + const llvm::SmallVector &mapSymTypes, + const llvm::SmallVector &mapSymLocs, + const llvm::SmallVector &mapSymbols, + const mlir::Location ¤tLocation) { + assert(mapSymTypes.size() == mapSymLocs.size()); - return false; - }}); -} + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::Region ®ion = targetOp.getRegion(); -static mlir::omp::ParallelOp -genParallelOp(Fortran::lower::AbstractConverter &converter, - Fortran::lower::SymMap &symTable, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList, - bool outerCombined = false) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, numThreadsClauseOperand; - mlir::omp::ClauseProcBindKindAttr procBindKindAttr; - llvm::SmallVector allocateOperands, allocatorOperands, - reductionVars; - llvm::SmallVector reductionDeclSymbols; - llvm::SmallVector reductionSymbols; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel, - ifClauseOperand); - cp.processProcBind(procBindKindAttr); - cp.processDefault(); - cp.processAllocate(allocatorOperands, allocateOperands); - if (!outerCombined) - cp.processReduction(currentLocation, reductionVars, reductionDeclSymbols, - &reductionSymbols); + auto *regionBlock = + firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); - llvm::SmallVector reductionTypes; - reductionTypes.reserve(reductionVars.size()); - llvm::transform(reductionVars, std::back_inserter(reductionTypes), - [](mlir::Value v) { return v.getType(); }); + // Clones the `bounds` placing them inside the target region and returns them. + auto cloneBound = [&](mlir::Value bound) { + if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { + mlir::Operation *clonedOp = bound.getDefiningOp()->clone(); + regionBlock->push_back(clonedOp); + return clonedOp->getResult(0); - auto reductionCallback = [&](mlir::Operation *op) { - llvm::SmallVector locs(reductionVars.size(), - currentLocation); - auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {}, - reductionTypes, locs); - for (auto [arg, prv] : - llvm::zip_equal(reductionSymbols, block->getArguments())) { - converter.bindSymbol(*arg, prv); } - return reductionSymbols; + TODO(converter.getCurrentLocation(), + "target map clause operand unsupported bound type"); }; - auto offloadModOp = - llvm::cast(*converter.getModuleOp()); - mlir::omp::TargetOp targetOp = - findParentTargetOp(converter.getFirOpBuilder()); - - bool mustEvalOutsideTarget = - targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); - if (mustEvalOutsideTarget) { - HostClausesInsertionGuard guard(converter.getFirOpBuilder()); - cp.processNumThreads(stmtCtx, numThreadsClauseOperand); - } else { - cp.processNumThreads(stmtCtx, numThreadsClauseOperand); - } - - OpWithBodyGenInfo genInfo = - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setOuterCombined(outerCombined) - .setClauses(&clauseList) - .setReductions(&reductionSymbols, &reductionTypes) - .setGenRegionEntryCb(reductionCallback); - - if (!enableDelayedPrivatization) { - auto parallelOp = genOpWithBody( - genInfo, - /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, - numThreadsClauseOperand, allocateOperands, allocatorOperands, - reductionVars, - reductionDeclSymbols.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - reductionDeclSymbols), - procBindKindAttr, /*private_vars=*/llvm::SmallVector{}, - /*privatizers=*/nullptr); - - if (mustEvalOutsideTarget) { - if (numThreadsClauseOperand) - targetOp.getNumThreadsMutable().assign(numThreadsClauseOperand); - } else { - if (numThreadsClauseOperand) - parallelOp.getNumThreadsVarMutable().assign(numThreadsClauseOperand); - } - - return parallelOp; - } - - bool privatize = !outerCombined; - DataSharingProcessor dsp(converter, clauseList, eval, - /*useDelayedPrivatization=*/true, &symTable); - - if (privatize) - dsp.processStep1(); - - const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo(); - - auto genRegionEntryCB = [&](mlir::Operation *op) { - auto parallelOp = llvm::cast(op); - - llvm::SmallVector reductionLocs(reductionVars.size(), - currentLocation); - - mlir::OperandRange privateVars = parallelOp.getPrivateVars(); - mlir::Region ®ion = parallelOp.getRegion(); - - llvm::SmallVector privateVarTypes = reductionTypes; - privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(privateVarTypes), - [](mlir::Value v) { return v.getType(); }); - - llvm::SmallVector privateVarLocs = reductionLocs; - privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(privateVarLocs), - [](mlir::Value v) { return v.getLoc(); }); - - converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{}, - privateVarTypes, privateVarLocs); - - llvm::SmallVector allSymbols = - reductionSymbols; - allSymbols.append(delayedPrivatizationInfo.symbols); - for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { - converter.bindSymbol(*arg, prv); - } - - return allSymbols; - }; - - // TODO Merge with the reduction CB. - genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); - - llvm::SmallVector privatizers( - delayedPrivatizationInfo.privatizers.begin(), - delayedPrivatizationInfo.privatizers.end()); - - auto parallelOp = genOpWithBody( - genInfo, - /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, - /*num_threads_var=*/nullptr, allocateOperands, allocatorOperands, - reductionVars, - reductionDeclSymbols.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - reductionDeclSymbols), - procBindKindAttr, delayedPrivatizationInfo.originalAddresses, - delayedPrivatizationInfo.privatizers.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - privatizers)); - - if (mustEvalOutsideTarget) { - if (numThreadsClauseOperand) - targetOp.getNumThreadsMutable().assign(numThreadsClauseOperand); - } else { - if (numThreadsClauseOperand) - parallelOp.getNumThreadsVarMutable().assign(numThreadsClauseOperand); - } - - return parallelOp; -} - -static mlir::omp::SectionOp -genSectionOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList §ionsClauseList) { - // Currently only private/firstprivate clause is handled, and - // all privatization is done within `omp.section` operations. - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(§ionsClauseList)); -} - -static mlir::omp::SingleOp -genSingleOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &beginClauseList, - const Fortran::parser::OmpClauseList &endClauseList) { - llvm::SmallVector allocateOperands, allocatorOperands; - llvm::SmallVector copyPrivateVars; - llvm::SmallVector copyPrivateFuncs; - mlir::UnitAttr nowaitAttr; - - ClauseProcessor cp(converter, semaCtx, beginClauseList); - cp.processAllocate(allocatorOperands, allocateOperands); - - ClauseProcessor ecp(converter, semaCtx, endClauseList); - ecp.processNowait(nowaitAttr); - ecp.processCopyPrivate(currentLocation, copyPrivateVars, copyPrivateFuncs); - - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(&beginClauseList), - allocateOperands, allocatorOperands, copyPrivateVars, - copyPrivateFuncs.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - copyPrivateFuncs), - nowaitAttr); -} - -static mlir::omp::TaskOp -genTaskOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, finalClauseOperand, priorityClauseOperand; - mlir::UnitAttr untiedAttr, mergeableAttr; - llvm::SmallVector dependTypeOperands; - llvm::SmallVector allocateOperands, allocatorOperands, - dependOperands; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task, - ifClauseOperand); - cp.processAllocate(allocatorOperands, allocateOperands); - cp.processDefault(); - cp.processFinal(stmtCtx, finalClauseOperand); - cp.processUntied(untiedAttr); - cp.processMergeable(mergeableAttr); - cp.processPriority(stmtCtx, priorityClauseOperand); - cp.processDepend(dependTypeOperands, dependOperands); - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_task); - - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(&clauseList), - ifClauseOperand, finalClauseOperand, untiedAttr, mergeableAttr, - /*in_reduction_vars=*/mlir::ValueRange(), - /*in_reductions=*/nullptr, priorityClauseOperand, - dependTypeOperands.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - dependTypeOperands), - dependOperands, allocateOperands, allocatorOperands); -} - -static mlir::omp::TaskGroupOp -genTaskGroupOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - llvm::SmallVector allocateOperands, allocatorOperands; - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processAllocate(allocatorOperands, allocateOperands); - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_taskgroup); - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(&clauseList), - /*task_reduction_vars=*/mlir::ValueRange(), - /*task_reductions=*/nullptr, allocateOperands, allocatorOperands); -} - -static mlir::omp::DataOp -genDataOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, deviceOperand; - llvm::SmallVector mapOperands, devicePtrOperands, - deviceAddrOperands; - llvm::SmallVector useDeviceTypes; - llvm::SmallVector useDeviceLocs; - llvm::SmallVector useDeviceSymbols; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData, - ifClauseOperand); - cp.processDevice(stmtCtx, deviceOperand); - cp.processUseDevicePtr(devicePtrOperands, useDeviceTypes, useDeviceLocs, - useDeviceSymbols); - cp.processUseDeviceAddr(deviceAddrOperands, useDeviceTypes, useDeviceLocs, - useDeviceSymbols); - cp.processMap(currentLocation, llvm::omp::Directive::OMPD_target_data, - stmtCtx, mapOperands); - - auto dataOp = converter.getFirOpBuilder().create( - currentLocation, ifClauseOperand, deviceOperand, devicePtrOperands, - deviceAddrOperands, mapOperands); - genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp, - useDeviceTypes, useDeviceLocs, useDeviceSymbols, - currentLocation); - return dataOp; -} - -template -static OpTy -genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, deviceOperand; - mlir::UnitAttr nowaitAttr; - llvm::SmallVector mapOperands, dependOperands; - llvm::SmallVector dependTypeOperands; - - Fortran::parser::OmpIfClause::DirectiveNameModifier directiveName; - // GCC 9.3.0 emits a (probably) bogus warning about an unused variable. - [[maybe_unused]] llvm::omp::Directive directive; - if constexpr (std::is_same_v) { - directiveName = - Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData; - directive = llvm::omp::Directive::OMPD_target_enter_data; - } else if constexpr (std::is_same_v) { - directiveName = - Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData; - directive = llvm::omp::Directive::OMPD_target_exit_data; - } else if constexpr (std::is_same_v) { - directiveName = - Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate; - directive = llvm::omp::Directive::OMPD_target_update; - } else { - return nullptr; - } - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(directiveName, ifClauseOperand); - cp.processDevice(stmtCtx, deviceOperand); - cp.processDepend(dependTypeOperands, dependOperands); - cp.processNowait(nowaitAttr); - - if constexpr (std::is_same_v) { - cp.processMotionClauses(stmtCtx, - mapOperands); - cp.processMotionClauses(stmtCtx, - mapOperands); - - } else { - cp.processMap(currentLocation, directive, stmtCtx, mapOperands); - } - - return firOpBuilder.create( - currentLocation, ifClauseOperand, deviceOperand, - dependTypeOperands.empty() - ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - dependTypeOperands), - dependOperands, nowaitAttr, mapOperands); -} - -// This functions creates a block for the body of the targetOp's region. It adds -// all the symbols present in mapSymbols as block arguments to this block. -static void genBodyOfTargetOp( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::omp::TargetOp &targetOp, - const llvm::SmallVector &mapSymTypes, - const llvm::SmallVector &mapSymLocs, - const llvm::SmallVector &mapSymbols, - const mlir::Location ¤tLocation) { - assert(mapSymTypes.size() == mapSymLocs.size()); - - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Region ®ion = targetOp.getRegion(); - - auto *regionBlock = - firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); - - // Clones the `bounds` placing them inside the target region and returns them. - auto cloneBound = [&](mlir::Value bound) { - if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { - mlir::Operation *clonedOp = bound.getDefiningOp()->clone(); - regionBlock->push_back(clonedOp); - return clonedOp->getResult(0); - } - TODO(converter.getCurrentLocation(), - "target map clause operand unsupported bound type"); - }; - - auto cloneBounds = [cloneBound](llvm::ArrayRef bounds) { - llvm::SmallVector clonedBounds; - for (mlir::Value bound : bounds) - clonedBounds.emplace_back(cloneBound(bound)); - return clonedBounds; - }; + auto cloneBounds = [cloneBound](llvm::ArrayRef bounds) { + llvm::SmallVector clonedBounds; + for (mlir::Value bound : bounds) + clonedBounds.emplace_back(cloneBound(bound)); + return clonedBounds; + }; // Bind the symbols to their corresponding block arguments. for (auto [argIndex, argSymbol] : llvm::enumerate(mapSymbols)) { @@ -1187,54 +664,1053 @@ static void genBodyOfTargetOp( firOpBuilder.createTemporary(val.getLoc(), val.getType()); firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); - llvm::SmallVector bounds; - std::stringstream name; - firOpBuilder.setInsertionPoint(targetOp); - mlir::Value mapOp = createMapInfoOp( - firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(), - bounds, llvm::SmallVector{}, - static_cast< - std::underlying_type_t>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), - mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); - targetOp.getMapOperandsMutable().append(mapOp); - mlir::Value clonedValArg = - region.addArgument(copyVal.getType(), copyVal.getLoc()); - firOpBuilder.setInsertionPointToStart(regionBlock); - auto loadOp = firOpBuilder.create(clonedValArg.getLoc(), - clonedValArg); - val.replaceUsesWithIf( - loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) { - return use.getOwner()->getBlock() == regionBlock; - }); - firOpBuilder.setInsertionPoint(regionBlock, savedIP); - } + llvm::SmallVector bounds; + std::stringstream name; + firOpBuilder.setInsertionPoint(targetOp); + mlir::Value mapOp = createMapInfoOp( + firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(), + bounds, llvm::SmallVector{}, + static_cast< + std::underlying_type_t>( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), + mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); + targetOp.getMapOperandsMutable().append(mapOp); + mlir::Value clonedValArg = + region.addArgument(copyVal.getType(), copyVal.getLoc()); + firOpBuilder.setInsertionPointToStart(regionBlock); + auto loadOp = firOpBuilder.create(clonedValArg.getLoc(), + clonedValArg); + val.replaceUsesWithIf( + loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) { + return use.getOwner()->getBlock() == regionBlock; + }); + firOpBuilder.setInsertionPoint(regionBlock, savedIP); + } + } + valuesDefinedAbove.clear(); + mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); + } + + // Insert dummy instruction to remember the insertion position. The + // marker will be deleted since there are not uses. + // In the HLFIR flow there are hlfir.declares inserted above while + // setting block arguments. + mlir::Value undefMarker = firOpBuilder.create( + targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); + + // Create blocks for unstructured regions. This has to be done since + // blocks are initially allocated with the function as the parent region. + if (eval.lowerAsUnstructured()) { + Fortran::lower::createEmptyRegionBlocks( + firOpBuilder, eval.getNestedEvaluations()); + } + + firOpBuilder.create(currentLocation); + + // Create the insertion point after the marker. + firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); + if (genNested) + genNestedEvaluations(converter, eval); +} + +template +static OpTy genOpWithBody(const OpWithBodyGenInfo &info, Args &&...args) { + auto op = info.converter.getFirOpBuilder().create( + info.loc, std::forward(args)...); + createBodyOfOp(op, info); + return op; +} + +static mlir::Value +calculateTripCount(Fortran::lower::AbstractConverter &converter, + mlir::Location loc, llvm::ArrayRef lbs, + llvm::ArrayRef ubs, + llvm::ArrayRef steps) { + using namespace mlir::arith; + assert(lbs.size() == ubs.size() && lbs.size() == steps.size() && + !lbs.empty() && "Invalid bounds or step"); + + fir::FirOpBuilder &b = converter.getFirOpBuilder(); + + // Get the bit width of an integer-like type. + auto widthOf = [](mlir::Type ty) -> unsigned { + if (mlir::isa(ty)) { + return mlir::IndexType::kInternalStorageBitWidth; + } + if (auto tyInt = mlir::dyn_cast(ty)) { + return tyInt.getWidth(); + } + llvm_unreachable("Unexpected type"); + }; + + // For a type that is either IntegerType or IndexType, return the + // equivalent IntegerType. In the former case this is a no-op. + auto asIntTy = [&](mlir::Type ty) -> mlir::IntegerType { + if (ty.isIndex()) { + return mlir::IntegerType::get(ty.getContext(), widthOf(ty)); + } + assert(ty.isIntOrIndex() && "Unexpected type"); + return mlir::cast(ty); + }; + + // For two given values, establish a common signless IntegerType + // that can represent any value of type of x and of type of y, + // and return the pair of x, y converted to the new type. + auto unifyToSignless = + [&](fir::FirOpBuilder &b, mlir::Value x, + mlir::Value y) -> std::pair { + auto tyX = asIntTy(x.getType()), tyY = asIntTy(y.getType()); + unsigned width = std::max(widthOf(tyX), widthOf(tyY)); + auto wideTy = mlir::IntegerType::get(b.getContext(), width, + mlir::IntegerType::Signless); + return std::make_pair(b.createConvert(loc, wideTy, x), + b.createConvert(loc, wideTy, y)); + }; + + // Start with signless i32 by default. + auto tripCount = b.createIntegerConstant(loc, b.getI32Type(), 1); + + for (auto [origLb, origUb, origStep] : llvm::zip(lbs, ubs, steps)) { + auto tmpS0 = b.createIntegerConstant(loc, origStep.getType(), 0); + auto [step, step0] = unifyToSignless(b, origStep, tmpS0); + auto reverseCond = b.create(loc, CmpIPredicate::slt, step, step0); + auto negStep = b.create(loc, step0, step); + mlir::Value absStep = b.create(loc, reverseCond, negStep, step); + + auto [lb, ub] = unifyToSignless(b, origLb, origUb); + auto start = b.create(loc, reverseCond, ub, lb); + auto end = b.create(loc, reverseCond, lb, ub); + + mlir::Value range = b.create(loc, end, start); + auto rangeCond = b.create(loc, CmpIPredicate::slt, end, start); + std::tie(range, absStep) = unifyToSignless(b, range, absStep); + // numSteps = (range /u absStep) + 1 + auto numSteps = + b.create(loc, b.create(loc, range, absStep), + b.createIntegerConstant(loc, range.getType(), 1)); + + auto trip0 = b.createIntegerConstant(loc, numSteps.getType(), 0); + auto loopTripCount = b.create(loc, rangeCond, trip0, numSteps); + auto [totalTC, thisTC] = unifyToSignless(b, tripCount, loopTripCount); + tripCount = b.create(loc, totalTC, thisTC); + } + + return tripCount; +} + +static bool evalHasSiblings(Fortran::lower::pft::Evaluation &eval) { + return eval.parent.visit(Fortran::common::visitors{ + [&](const Fortran::lower::pft::Program &parent) { + return parent.getUnits().size() + parent.getCommonBlocks().size() > 1; + }, + [&](const Fortran::lower::pft::Evaluation &parent) { + for (auto &sibling : *parent.evaluationList) + if (&sibling != &eval && !sibling.isEndStmt()) + return true; + + return false; + }, + [&](const auto &parent) { + for (auto &sibling : parent.evaluationList) + if (&sibling != &eval && !sibling.isEndStmt()) + return true; + + return false; + }}); +} + +/// Extract the list of function and variable symbols affected by the given +/// 'declare target' directive and return the intended device type for them. +static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, + EnterLinkToClauseOps &enterLinkToClauseOps) { + + // The default capture type + DeviceTypeClauseOps deviceTypeClauseOps = { + mlir::omp::DeclareTargetDeviceType::any}; + const auto &spec = std::get( + declareTargetConstruct.t); + + if (const auto *objectList{ + Fortran::parser::Unwrap(spec.u)}) { + // Case: declare target(func, var1, var2) + gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to, + enterLinkToClauseOps.symbolAndClause); + } else if (const auto *clauseList{ + Fortran::parser::Unwrap( + spec.u)}) { + if (clauseList->v.empty()) { + // Case: declare target, implicit capture of function + enterLinkToClauseOps.symbolAndClause.emplace_back( + mlir::omp::DeclareTargetCaptureClause::to, + eval.getOwningProcedure()->getSubprogramSymbol()); + } + + ClauseProcessor cp(converter, semaCtx, *clauseList); + cp.processDeviceType(deviceTypeClauseOps); + cp.processEnter(enterLinkToClauseOps); + cp.processLink(enterLinkToClauseOps); + cp.processTo(enterLinkToClauseOps); + cp.processTODO( + converter.getCurrentLocation(), + llvm::omp::Directive::OMPD_declare_target); + } + + return deviceTypeClauseOps.deviceType; +} + +static void collectDeferredDeclareTargets( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, + llvm::SmallVectorImpl + &deferredDeclareTarget) { + EnterLinkToClauseOps clauseOps; + mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo( + converter, semaCtx, eval, declareTargetConstruct, clauseOps); + // Return the device type only if at least one of the targets for the + // directive is a function or subroutine + mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + + for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) { + mlir::Operation *op = mod.lookupSymbol(converter.mangleName( + std::get(symClause))); + + if (!op) { + deferredDeclareTarget.push_back( + {std::get<0>(symClause), devType, std::get<1>(symClause)}); + } + } +} + +static std::optional +getDeclareTargetFunctionDevice( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct + &declareTargetConstruct) { + EnterLinkToClauseOps clauseOps; + mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo( + converter, semaCtx, eval, declareTargetConstruct, clauseOps); + + // Return the device type only if at least one of the targets for the + // directive is a function or subroutine + mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) { + mlir::Operation *op = mod.lookupSymbol(converter.mangleName( + std::get(symClause))); + + if (mlir::isa_and_nonnull(op)) + return deviceType; + } + + return std::nullopt; +} + +static mlir::Operation * +createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, + mlir::Location loc, mlir::Value indexVal, + const Fortran::semantics::Symbol *sym) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); + firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); + + mlir::Type tempTy = converter.genType(*sym); + mlir::Value temp = firOpBuilder.create( + loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{}, + /*shapeParams*/ mlir::ValueRange{}, + llvm::ArrayRef{ + fir::getAdaptToByRefAttr(firOpBuilder)}); + converter.bindSymbol(*sym, temp); + firOpBuilder.restoreInsertionPoint(insPt); + mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); + mlir::Operation *storeOp = firOpBuilder.create( + loc, cvtVal, converter.getSymbolAddress(*sym)); + return storeOp; +} + +static void +genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + llvm::ArrayRef args) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + auto ®ion = op->getRegion(0); + + std::size_t loopVarTypeSize = 0; + for (const Fortran::semantics::Symbol *arg : args) + loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + llvm::SmallVector tiv(args.size(), loopVarType); + llvm::SmallVector locs(args.size(), loc); + firOpBuilder.createBlock(®ion, {}, tiv, locs); + // The argument is not currently in memory, so make a temporary for the + // argument, and store it there, then bind that location to the argument. + mlir::Operation *storeOp = nullptr; + for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { + mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); + storeOp = + createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); + } + firOpBuilder.setInsertionPointAfter(storeOp); +} + +static void genReductionVars( + mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + llvm::ArrayRef reductionArgs, + llvm::ArrayRef reductionTypes) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + llvm::SmallVector blockArgLocs(reductionArgs.size(), loc); + + mlir::Block *entryBlock = firOpBuilder.createBlock( + &op->getRegion(0), {}, reductionTypes, blockArgLocs); + + // Bind the reduction arguments to their block arguments + for (auto [arg, prv] : + llvm::zip_equal(reductionArgs, entryBlock->getArguments())) { + converter.bindSymbol(*arg, prv); + } +} + +//===----------------------------------------------------------------------===// +// Code generation functions for clauses +//===----------------------------------------------------------------------===// + +// TODO Try to compile, check privatization of simple wsloop/simdloop/distribute +// TODO Move common args and functions into a ConstructProcessor class + +static void +genCriticalDeclareClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + CriticalDeclareOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processHint(clauseOps); +} + +static void genDataClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, DataOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processDevice(stmtCtx, clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetData, + clauseOps); + cp.processMap(loc, stmtCtx, clauseOps); + cp.processUseDeviceAddr(clauseOps); + cp.processUseDevicePtr(clauseOps); +} + +static void genDistributeClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + DistributeOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_distribute); +} + +static void genEnterExitUpdateDataClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, mlir::Location loc, + Fortran::parser::OmpIfClause::DirectiveNameModifier directive, + EnterExitUpdateDataOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processDepend(clauseOps); + cp.processDevice(stmtCtx, clauseOps); + cp.processIf(directive, clauseOps); + cp.processNowait(clauseOps); + + if (directive == + Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate) { + cp.processMotionClauses(stmtCtx, clauseOps); + cp.processMotionClauses(stmtCtx, + clauseOps); + } else { + cp.processMap(loc, stmtCtx, clauseOps); + } +} + +static void genFlushClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const std::optional &objects, + const std::optional> + &clauses, + mlir::Location loc, llvm::SmallVectorImpl &operandRange) { + if (objects) + genObjectList(*objects, converter, operandRange); + + if (clauses && clauses->size() > 0) + TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause"); +} + +static void genLoopNestClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + LoopNestOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processCollapse(loc, eval, clauseOps); +} + +static void +genOrderedRegionClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + OrderedRegionOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_ordered); +} + +static void genParallelClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, bool processReduction, + bool evalNumThreadsOutsideTarget, + ParallelOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processDefault(); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Parallel, + clauseOps); + cp.processProcBind(clauseOps); + + if (processReduction) + cp.processReduction(loc, clauseOps); + + if (evalNumThreadsOutsideTarget) { + HostClausesInsertionGuard guard(converter.getFirOpBuilder()); + cp.processNumThreads(stmtCtx, clauseOps); + } else { + cp.processNumThreads(stmtCtx, clauseOps); + } +} + +static void genSectionsClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + bool clausesFromBeginSections, + SectionsOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + if (clausesFromBeginSections) { + cp.processAllocate(clauseOps); + cp.processSectionsReduction(loc, clauseOps); + } else { + cp.processNowait(clauseOps); + } +} + +static void genSimdLoopClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + SimdLoopOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd, + clauseOps); + cp.processReduction(loc, clauseOps); + cp.processSafelen(clauseOps); + cp.processSimdlen(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_simd); +} + +static void genSingleClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &beginClauses, + const Fortran::parser::OmpClauseList &endClauses, + mlir::Location loc, SingleOpClauseOps &clauseOps) { + ClauseProcessor bcp(converter, semaCtx, beginClauses); + bcp.processAllocate(clauseOps); + + ClauseProcessor ecp(converter, semaCtx, endClauses); + ecp.processCopyprivate(loc, clauseOps); + ecp.processNowait(clauseOps); +} + +static void genTargetClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, bool processHostOnlyClauses, + bool processReduction, + TargetOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processDepend(clauseOps); + cp.processDevice(stmtCtx, clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target, + clauseOps); + cp.processMap(loc, stmtCtx, clauseOps); + cp.processThreadLimit(stmtCtx, clauseOps); + + if (processHostOnlyClauses) + cp.processNowait(clauseOps); + + if (processReduction) + cp.processTargetReduction(clauseOps); + + cp.processTODO( + loc, llvm::omp::Directive::OMPD_target); +} + +static void genTaskClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, TaskOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processDefault(); + cp.processDepend(clauseOps); + cp.processFinal(stmtCtx, clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Task, + clauseOps); + cp.processMergeable(clauseOps); + cp.processPriority(stmtCtx, clauseOps); + cp.processUntied(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_task); +} + +static void genTaskGroupClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + TaskGroupOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_taskgroup); +} + +static void genTaskLoopClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + TaskLoopOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processDefault(); + cp.processFinal(stmtCtx, clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Taskloop, + clauseOps); + cp.processMergeable(clauseOps); + cp.processPriority(stmtCtx, clauseOps); + cp.processReduction(loc, clauseOps); + cp.processUntied(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_taskloop); +} + +static void genTaskWaitClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + TaskWaitOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_taskwait); +} + +static void genTeamsClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, bool evalNumTeamsOutsideTarget, + TeamsOpClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams, + clauseOps); + cp.processDefault(); + + if (evalNumTeamsOutsideTarget) { + HostClausesInsertionGuard guard(converter.getFirOpBuilder()); + cp.processNumTeams(stmtCtx, clauseOps); + cp.processThreadLimit(stmtCtx, clauseOps); + } else { + cp.processNumTeams(stmtCtx, clauseOps); + cp.processThreadLimit(stmtCtx, clauseOps); + } +} + +static void genWsLoopClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &beginClauses, + const Fortran::parser::OmpClauseList *endClauses, + mlir::Location loc, WsloopOpClauseOps &clauseOps) { + ClauseProcessor bcp(converter, semaCtx, beginClauses); + bcp.processOrdered(clauseOps); + bcp.processReduction(loc, clauseOps); + bcp.processSchedule(stmtCtx, clauseOps); + + if (endClauses) { + ClauseProcessor ecp(converter, semaCtx, *endClauses); + ecp.processNowait(clauseOps); + } + + bcp.processTODO( + loc, llvm::omp::Directive::OMPD_do); +} + +//===----------------------------------------------------------------------===// +// Code generation functions for leaf constructs +//===----------------------------------------------------------------------===// + +// TODO Pass OpClauseOps as arg to all genOp + +static mlir::omp::BarrierOp +genBarrierOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation) { + return converter.getFirOpBuilder().create( + currentLocation); +} + +static mlir::omp::CriticalOp +genCriticalOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList, + const std::optional &name) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::FlatSymbolRefAttr nameAttr; + + if (name.has_value()) { + CriticalDeclareOpClauseOps clauseOps; + genCriticalDeclareClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + + std::string nameStr = name.value().ToString(); + mlir::ModuleOp module = firOpBuilder.getModule(); + auto global = module.lookupSymbol(nameStr); + if (!global) { + mlir::OpBuilder modBuilder(module.getBodyRegion()); + global = modBuilder.create( + currentLocation, firOpBuilder.getStringAttr(nameStr), + clauseOps.hintAttr); + } + nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), + global.getSymName()); + } + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested), + nameAttr); +} + +static mlir::omp::DataOp +genDataOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + Fortran::lower::StatementContext stmtCtx; + DataOpClauseOps clauseOps; + genDataClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + clauseOps); + + auto dataOp = converter.getFirOpBuilder().create( + currentLocation, clauseOps.ifVar, clauseOps.deviceVar, + clauseOps.useDevicePtrVars, clauseOps.useDeviceAddrVars, + clauseOps.mapVars); + + genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp, + clauseOps.useDeviceTypes, clauseOps.useDeviceLocs, + clauseOps.useDeviceSymbols, currentLocation); + return dataOp; +} + +static mlir::omp::DistributeOp +genDistributeOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool isComposite, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList, + bool outerCombined = false) { + DistributeOpClauseOps clauseOps; + genDistributeClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(false) + .setOuterCombined(outerCombined) + .setClauses(&clauseList), + clauseOps.distScheduleStaticAttr, clauseOps.distScheduleChunkSizeVar, + clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.orderAttr, + isComposite ? converter.getFirOpBuilder().getUnitAttr() : nullptr); +} + +template +static OpTy +genEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + + Fortran::parser::OmpIfClause::DirectiveNameModifier directive; + if constexpr (std::is_same_v) { + directive = + Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetEnterData; + } else if constexpr (std::is_same_v) { + directive = + Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetExitData; + } else if constexpr (std::is_same_v) { + directive = + Fortran::parser::OmpIfClause::DirectiveNameModifier::TargetUpdate; + } else { + llvm_unreachable("Unexpected TARGET data construct"); + } + + EnterExitUpdateDataOpClauseOps clauseOps; + genEnterExitUpdateDataClauses(converter, semaCtx, stmtCtx, clauseList, + currentLocation, directive, clauseOps); + + return firOpBuilder.create( + currentLocation, clauseOps.ifVar, clauseOps.deviceVar, + clauseOps.dependTypeAttrs.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs), + clauseOps.dependVars, clauseOps.nowaitAttr, clauseOps.mapVars); +} + +static mlir::omp::FlushOp +genFlushOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation, + const std::optional &objectList, + const std::optional> + &clauseList) { + llvm::SmallVector operandRange; + genFlushClauses(converter, semaCtx, objectList, clauseList, currentLocation, + operandRange); + + return converter.getFirOpBuilder().create( + converter.getCurrentLocation(), operandRange); +} + +static mlir::omp::LoopNestOp +genLoopNestOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList, + const LoopNestOpClauseOps &clauseOps, DataSharingProcessor &dsp) { + auto *nestedEval = + getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList)); + + auto ivCallback = [&](mlir::Operation *op) { + genLoopVars(op, converter, currentLocation, clauseOps.loopIV); + return clauseOps.loopIV; + }; + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval) + .setClauses(&clauseList) + .setDataSharingProcessor(&dsp) + .setGenRegionEntryCb(ivCallback) + .setGenNested(true), + clauseOps.loopLBVar, clauseOps.loopUBVar, clauseOps.loopStepVar, + /*inclusive=*/converter.getFirOpBuilder().getUnitAttr()); +} + +static mlir::omp::MasterOp +genMasterOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation) { + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested), + /*resultTypes=*/mlir::TypeRange()); +} + +static mlir::omp::OrderedRegionOp +genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + OrderedRegionOpClauseOps clauseOps; + genOrderedRegionClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + + // TODO Store clauseOps.parLevelThreadsAttr in op. + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested), + clauseOps.parLevelSimdAttr); +} + +static mlir::omp::ParallelOp +genParallelOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + bool isComposite, mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList, + bool outerCombined = false) { + // TODO Distinguish between genParallelOp as block vs wrapper + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + ParallelOpClauseOps clauseOps; + clauseOps.reductionSymbols.emplace(); + + auto offloadModOp = + llvm::cast(*converter.getModuleOp()); + mlir::omp::TargetOp targetOp = findParentTargetOp(firOpBuilder); + + bool evalNumThreadsOutsideTarget = + targetOp && !offloadModOp.getIsTargetDevice() && !evalHasSiblings(eval); + + genParallelClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + /*processReduction=*/!outerCombined, + evalNumThreadsOutsideTarget, clauseOps); + + auto reductionCallback = [&](mlir::Operation *op) { + genReductionVars(op, converter, currentLocation, + *clauseOps.reductionSymbols, clauseOps.reductionTypes); + return *clauseOps.reductionSymbols; + }; + + OpWithBodyGenInfo genInfo = + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setOuterCombined(outerCombined) + .setClauses(&clauseList) + .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes) + .setGenRegionEntryCb(reductionCallback); + + if (!enableDelayedPrivatization) { + auto parallelOp = genOpWithBody( + genInfo, clauseOps.ifVar, /*num_threads_var=*/nullptr, + clauseOps.allocateVars, clauseOps.allocatorVars, + clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols), + clauseOps.procBindKindAttr, clauseOps.privateVars, + clauseOps.privatizers.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.privatizers), + isComposite ? firOpBuilder.getUnitAttr() : nullptr); + + if (clauseOps.numThreadsVar) { + if (evalNumThreadsOutsideTarget) + targetOp.getNumThreadsMutable().assign(clauseOps.numThreadsVar); + else + parallelOp.getNumThreadsVarMutable().assign(clauseOps.numThreadsVar); } - valuesDefinedAbove.clear(); - mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); + + return parallelOp; } - // Insert dummy instruction to remember the insertion position. The - // marker will be deleted since there are not uses. - // In the HLFIR flow there are hlfir.declares inserted above while - // setting block arguments. - mlir::Value undefMarker = firOpBuilder.create( - targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); + // TODO Integrate delayed privatization better with the new approach. + // - Store delayedPrivatizationInfo.{originalAddresses,privatizers} in + // clauseOps.{privateVars,privatizers}. + // - Outline genRegionEntryCB into composable genPrivatizedVars. + // - Refactor to create the omp.parallel op in a single place and possibly + // only use a single callback. + // - Check whether the external DataSharingProcessor could be used, and skip + // the call to processStep1() here. Perhaps also skip setting it in the + // OpWithBodyGenInfo structure. - // Create blocks for unstructured regions. This has to be done since - // blocks are initially allocated with the function as the parent region. - if (eval.lowerAsUnstructured()) { - Fortran::lower::createEmptyRegionBlocks( - firOpBuilder, eval.getNestedEvaluations()); + bool privatize = !outerCombined; + DataSharingProcessor dsp(converter, clauseList, eval, + /*useDelayedPrivatization=*/true, &symTable); + + if (privatize) + dsp.processStep1(); + + const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo(); + + auto genRegionEntryCB = [&](mlir::Operation *op) { + auto parallelOp = llvm::cast(op); + + llvm::SmallVector reductionLocs( + clauseOps.reductionVars.size(), currentLocation); + + mlir::OperandRange privateVars = parallelOp.getPrivateVars(); + mlir::Region ®ion = parallelOp.getRegion(); + + llvm::SmallVector privateVarTypes = clauseOps.reductionTypes; + privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarTypes), + [](mlir::Value v) { return v.getType(); }); + + llvm::SmallVector privateVarLocs = reductionLocs; + privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarLocs), + [](mlir::Value v) { return v.getLoc(); }); + + converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{}, + privateVarTypes, privateVarLocs); + + llvm::SmallVector allSymbols = + *clauseOps.reductionSymbols; + allSymbols.append(delayedPrivatizationInfo.symbols); + for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { + converter.bindSymbol(*arg, prv); + } + + return allSymbols; + }; + + // TODO Merge with the reduction CB. + genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); + + llvm::SmallVector privatizers( + delayedPrivatizationInfo.privatizers.begin(), + delayedPrivatizationInfo.privatizers.end()); + + auto parallelOp = genOpWithBody( + genInfo, clauseOps.ifVar, /*num_threads_var=*/nullptr, + clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols), + clauseOps.procBindKindAttr, delayedPrivatizationInfo.originalAddresses, + delayedPrivatizationInfo.privatizers.empty() + ? nullptr + : firOpBuilder.getArrayAttr(privatizers), + isComposite ? firOpBuilder.getUnitAttr() : nullptr); + + if (clauseOps.numThreadsVar) { + if (evalNumThreadsOutsideTarget) + targetOp.getNumThreadsMutable().assign(clauseOps.numThreadsVar); + else + parallelOp.getNumThreadsVarMutable().assign(clauseOps.numThreadsVar); } - firOpBuilder.create(currentLocation); + return parallelOp; +} - // Create the insertion point after the marker. - firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); - if (genNested) - genNestedEvaluations(converter, eval); +static mlir::omp::SectionOp +genSectionOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + // Currently only private/firstprivate clause is handled, and + // all privatization is done within `omp.section` operations. + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setClauses(&clauseList)); +} + +static mlir::omp::SectionsOp +genSectionsOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation, + const SectionsOpClauseOps &clauseOps) { + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(false), + /*reduction_vars=*/mlir::ValueRange(), /*reductions=*/nullptr, + clauseOps.allocateVars, clauseOps.allocatorVars, clauseOps.nowaitAttr); +} + +static mlir::omp::SimdLoopOp +genSimdLoopOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool isComposite, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + SimdLoopOpClauseOps clauseOps; + genSimdLoopClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + clauseOps); + + auto *nestedEval = + getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList)); + + // TODO Create callback to add reduction vars as entry block arguments. + + // TODO Store clauseOps.reductionVars, clauseOps.reductionDeclSymbols in op. + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval) + .setGenNested(false), + /*result_types=*/mlir::TypeRange(), clauseOps.alignedVars, + clauseOps.alignmentAttrs.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.alignmentAttrs), + clauseOps.ifVar, clauseOps.nontemporalVars, clauseOps.orderAttr, + clauseOps.simdlenAttr, clauseOps.safelenAttr, + isComposite ? firOpBuilder.getUnitAttr() : nullptr); +} + +static mlir::omp::SingleOp +genSingleOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList &endClauseList) { + SingleOpClauseOps clauseOps; + genSingleClauses(converter, semaCtx, beginClauseList, endClauseList, + currentLocation, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setClauses(&beginClauseList), + clauseOps.allocateVars, clauseOps.allocatorVars, + clauseOps.copyprivateVars, + clauseOps.copyprivateFuncs.empty() + ? nullptr + : converter.getFirOpBuilder().getArrayAttr( + clauseOps.copyprivateFuncs), + clauseOps.nowaitAttr); } static mlir::omp::TargetOp @@ -1243,50 +1719,29 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList, - llvm::omp::Directive directive, bool outerCombined = false) { + bool outerCombined = false) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; - mlir::Value ifClauseOperand, deviceOperand, threadLimitOperand; - mlir::UnitAttr nowaitAttr; - llvm::SmallVector dependTypeOperands; - llvm::SmallVector mapOperands, dependOperands; - llvm::SmallVector mapSymTypes; - llvm::SmallVector mapSymLocs; - llvm::SmallVector mapSymbols; - llvm::SmallVector reductionSymbols; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Target, - ifClauseOperand); - cp.processDevice(stmtCtx, deviceOperand); - cp.processThreadLimit(stmtCtx, threadLimitOperand); - cp.processDepend(dependTypeOperands, dependOperands); - cp.processMap(currentLocation, directive, stmtCtx, mapOperands, &mapSymTypes, - &mapSymLocs, &mapSymbols); - - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_target); - // Process host-only clauses. - if (!llvm::cast(*converter.getModuleOp()) - .getIsTargetDevice()) - cp.processNowait(nowaitAttr); + bool processHostOnlyClauses = + !llvm::cast(*converter.getModuleOp()) + .getIsTargetDevice(); - if (outerCombined) - cp.processTargetReduction(reductionSymbols); + TargetOpClauseOps clauseOps; + clauseOps.mapSymbols.emplace(); + clauseOps.mapSymLocs.emplace(); + clauseOps.mapSymTypes.emplace(); + genTargetClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + processHostOnlyClauses, /*processReduction=*/outerCombined, + clauseOps); // 5.8.1 Implicit Data-Mapping Attribute Rules // The following code follows the implicit data-mapping rules to map all the // symbols used inside the region that have not been explicitly mapped using // the map clause. auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) { - if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) { + if (llvm::find(*clauseOps.mapSymbols, &sym) == + clauseOps.mapSymbols->end()) { mlir::Value baseOp = converter.getSymbolAddress(sym); if (!baseOp) if (const auto *details = sym.template detailsIf< @@ -1301,22 +1756,21 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); name << sym.name().ToString(); - Fortran::lower::AddrAndBoundsInfo info = - getDataOperandBaseAddr(converter, converter.getFirOpBuilder(), sym, - converter.getCurrentLocation()); + Fortran::lower::AddrAndBoundsInfo info = getDataOperandBaseAddr( + converter, firOpBuilder, sym, converter.getCurrentLocation()); if (fir::unwrapRefType(info.addr.getType()).isa()) bounds = Fortran::lower::genBoundsOpsFromBox( - converter.getFirOpBuilder(), converter.getCurrentLocation(), - converter, dataExv, info); + firOpBuilder, converter.getCurrentLocation(), converter, + dataExv, info); if (fir::unwrapRefType(info.addr.getType()).isa()) { bool dataExvIsAssumedSize = Fortran::semantics::IsAssumedSizeArray(sym.GetUltimate()); bounds = Fortran::lower::genBaseBoundsOps( - converter.getFirOpBuilder(), converter.getCurrentLocation(), - converter, dataExv, dataExvIsAssumedSize); + firOpBuilder, converter.getCurrentLocation(), converter, dataExv, + dataExvIsAssumedSize); } llvm::omp::OpenMPOffloadMappingFlags mapFlag = @@ -1342,8 +1796,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; } - } else if (llvm::find(reductionSymbols, &sym) != - reductionSymbols.end()) { + } else if (llvm::find(clauseOps.targetReductionSymbols, &sym) != + clauseOps.targetReductionSymbols.end()) { // Do a tofrom map for reduction variables. mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; @@ -1355,39 +1809,160 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, } mlir::Value mapOp = createMapInfoOp( - converter.getFirOpBuilder(), baseOp.getLoc(), baseOp, mlir::Value{}, - name.str(), bounds, {}, + firOpBuilder, baseOp.getLoc(), baseOp, mlir::Value{}, name.str(), + bounds, {}, static_cast< std::underlying_type_t>( mapFlag), captureKind, baseOp.getType()); - mapOperands.push_back(mapOp); - mapSymTypes.push_back(baseOp.getType()); - mapSymLocs.push_back(baseOp.getLoc()); - mapSymbols.push_back(&sym); + clauseOps.mapVars.push_back(mapOp); + clauseOps.mapSymTypes->push_back(baseOp.getType()); + clauseOps.mapSymLocs->push_back(baseOp.getLoc()); + clauseOps.mapSymbols->push_back(&sym); } } }; Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap); - auto targetOp = converter.getFirOpBuilder().create( - currentLocation, ifClauseOperand, deviceOperand, threadLimitOperand, + auto targetOp = firOpBuilder.create( + currentLocation, clauseOps.ifVar, clauseOps.deviceVar, + clauseOps.threadLimitVar, /*trip_count=*/nullptr, - dependTypeOperands.empty() + clauseOps.dependTypeAttrs.empty() ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - dependTypeOperands), - dependOperands, nowaitAttr, mapOperands, + : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs), + clauseOps.dependVars, clauseOps.nowaitAttr, clauseOps.mapVars, /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, /*teams_thread_limit=*/nullptr, /*num_threads=*/nullptr); - genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSymTypes, - mapSymLocs, mapSymbols, currentLocation); + genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, + *clauseOps.mapSymTypes, *clauseOps.mapSymLocs, + *clauseOps.mapSymbols, currentLocation); return targetOp; } +static mlir::omp::TaskGroupOp +genTaskGroupOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + TaskGroupOpClauseOps clauseOps; + genTaskGroupClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + + // TODO Possibly create callback to add task reduction vars as entry block + // arguments. + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setClauses(&clauseList), + clauseOps.taskReductionVars, + clauseOps.taskReductionDeclSymbols.empty() + ? nullptr + : converter.getFirOpBuilder().getArrayAttr( + clauseOps.taskReductionDeclSymbols), + clauseOps.allocateVars, clauseOps.allocatorVars); +} + +static mlir::omp::TaskLoopOp +genTaskLoopOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool isComposite, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + TaskLoopOpClauseOps clauseOps; + clauseOps.reductionSymbols.emplace(); + genTaskLoopClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + clauseOps); + + auto *nestedEval = + getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList)); + + auto reductionCallback = [&](mlir::Operation *op) { + // TODO Possibly add in-reductions to the entry block argument list. + genReductionVars(op, converter, currentLocation, + *clauseOps.reductionSymbols, clauseOps.reductionTypes); + return *clauseOps.reductionSymbols; + }; + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval) + .setGenRegionEntryCb(reductionCallback) + .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes) + .setGenNested(false), + clauseOps.ifVar, clauseOps.finalVar, clauseOps.untiedAttr, + clauseOps.mergeableAttr, clauseOps.inReductionVars, + clauseOps.inReductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.inReductionDeclSymbols), + clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols), + clauseOps.priorityVar, clauseOps.allocateVars, clauseOps.allocatorVars, + clauseOps.grainsizeVar, clauseOps.numTasksVar, clauseOps.nogroupAttr, + isComposite ? firOpBuilder.getUnitAttr() : nullptr); +} + +static mlir::omp::TaskOp +genTaskOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + TaskOpClauseOps clauseOps; + genTaskClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + clauseOps); + + // TODO Possibly create callback to add in-reductions as entry block + // arguments. + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) + .setGenNested(genNested) + .setClauses(&clauseList), + clauseOps.ifVar, clauseOps.finalVar, clauseOps.untiedAttr, + clauseOps.mergeableAttr, clauseOps.inReductionVars, + clauseOps.inReductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.inReductionDeclSymbols), + clauseOps.priorityVar, + clauseOps.dependTypeAttrs.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.dependTypeAttrs), + clauseOps.dependVars, clauseOps.allocateVars, clauseOps.allocatorVars); +} + +static mlir::omp::TaskWaitOp +genTaskWaitOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &clauseList) { + TaskWaitOpClauseOps clauseOps; + genTaskWaitClauses(converter, semaCtx, clauseList, currentLocation, + clauseOps); + return converter.getFirOpBuilder().create( + currentLocation); +} + +static mlir::omp::TaskYieldOp +genTaskYieldOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location currentLocation) { + return converter.getFirOpBuilder().create( + currentLocation); +} + static mlir::omp::TeamsOp genTeamsOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, @@ -1395,192 +1970,233 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter, mlir::Location currentLocation, const Fortran::parser::OmpClauseList &clauseList, bool outerCombined = false) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; - mlir::Value numTeamsClauseOperand, ifClauseOperand, threadLimitClauseOperand; - llvm::SmallVector allocateOperands, allocatorOperands, - reductionVars; - llvm::SmallVector reductionDeclSymbols; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Teams, - ifClauseOperand); - cp.processAllocate(allocatorOperands, allocateOperands); - cp.processDefault(); - // cp.processTODO( - // currentLocation, llvm::omp::Directive::OMPD_teams); + TeamsOpClauseOps clauseOps; - // Evaluate NUM_TEAMS and THREAD_LIMIT on the host device, if currently inside - // of an omp.target operation. auto offloadModOp = llvm::cast( converter.getModuleOp().getOperation()); - mlir::omp::TargetOp targetOp = - findParentTargetOp(converter.getFirOpBuilder()); + mlir::omp::TargetOp targetOp = findParentTargetOp(firOpBuilder); - bool mustEvalOutsideTarget = targetOp && !offloadModOp.getIsTargetDevice(); - if (mustEvalOutsideTarget) { - HostClausesInsertionGuard guard(converter.getFirOpBuilder()); - cp.processNumTeams(stmtCtx, numTeamsClauseOperand); - cp.processThreadLimit(stmtCtx, threadLimitClauseOperand); - } else { - cp.processNumTeams(stmtCtx, numTeamsClauseOperand); - cp.processThreadLimit(stmtCtx, threadLimitClauseOperand); - } + bool evalNumTeamsOutsideTarget = + targetOp && !offloadModOp.getIsTargetDevice(); + + genTeamsClauses(converter, semaCtx, stmtCtx, clauseList, currentLocation, + evalNumTeamsOutsideTarget, clauseOps); + + // TODO Possibly create callback to add reductions as entry block arguments. auto teamsOp = genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setOuterCombined(outerCombined) .setClauses(&clauseList), - /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, ifClauseOperand, - /*thread_limit=*/nullptr, allocateOperands, allocatorOperands, - reductionVars, - reductionDeclSymbols.empty() + /*num_teams_lower=*/nullptr, /*num_teams_upper=*/nullptr, clauseOps.ifVar, + /*thread_limit=*/nullptr, clauseOps.allocateVars, clauseOps.allocatorVars, + clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() ? nullptr - : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), - reductionDeclSymbols)); - if (mustEvalOutsideTarget) { - if (numTeamsClauseOperand) - targetOp.getNumTeamsUpperMutable().assign(numTeamsClauseOperand); - if (threadLimitClauseOperand) - targetOp.getTeamsThreadLimitMutable().assign(threadLimitClauseOperand); + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols)); + + // TODO Populate lower bound once supported by the clause processor + if (evalNumTeamsOutsideTarget) { + if (clauseOps.numTeamsUpperVar) + targetOp.getNumTeamsUpperMutable().assign(clauseOps.numTeamsUpperVar); + if (clauseOps.threadLimitVar) + targetOp.getTeamsThreadLimitMutable().assign(clauseOps.threadLimitVar); } else { - if (numTeamsClauseOperand) - teamsOp.getNumTeamsUpperMutable().assign(numTeamsClauseOperand); - if (threadLimitClauseOperand) - teamsOp.getThreadLimitMutable().assign(threadLimitClauseOperand); + if (clauseOps.numTeamsUpperVar) + teamsOp.getNumTeamsUpperMutable().assign(clauseOps.numTeamsUpperVar); + if (clauseOps.threadLimitVar) + teamsOp.getThreadLimitMutable().assign(clauseOps.threadLimitVar); } return teamsOp; } -static mlir::omp::DistributeOp -genDistributeOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList, - bool outerCombined = false) { - // TODO Process clauses - // ClauseProcessor cp(converter, clauseList); - // cp.processAllocate(allocatorOperands, allocateOperands); - // ... - - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setOuterCombined(outerCombined) - .setClauses(&clauseList), - /*dist_schedule_static=*/nullptr, - /*chunk_size=*/nullptr, - /*allocate_vars=*/mlir::ValueRange(), - /*allocators_vars=*/mlir::ValueRange(), - /*order_val=*/nullptr); -} - -/// Extract the list of function and variable symbols affected by the given -/// 'declare target' directive and return the intended device type for them. -static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, - llvm::SmallVectorImpl &symbolAndClause) { - - // The default capture type - mlir::omp::DeclareTargetDeviceType deviceType = - mlir::omp::DeclareTargetDeviceType::any; - const auto &spec = std::get( - declareTargetConstruct.t); +static mlir::omp::WsLoopOp +genWsLoopOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool isComposite, + mlir::Location currentLocation, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + WsloopOpClauseOps clauseOps; + clauseOps.reductionSymbols.emplace(); + genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList, + currentLocation, clauseOps); - if (const auto *objectList{ - Fortran::parser::Unwrap(spec.u)}) { - // Case: declare target(func, var1, var2) - gatherFuncAndVarSyms(*objectList, mlir::omp::DeclareTargetCaptureClause::to, - symbolAndClause); - } else if (const auto *clauseList{ - Fortran::parser::Unwrap( - spec.u)}) { - if (clauseList->v.empty()) { - // Case: declare target, implicit capture of function - symbolAndClause.emplace_back( - mlir::omp::DeclareTargetCaptureClause::to, - eval.getOwningProcedure()->getSubprogramSymbol()); - } + auto *nestedEval = getCollapsedLoopEval( + eval, Fortran::lower::getCollapseValue(beginClauseList)); - ClauseProcessor cp(converter, semaCtx, *clauseList); - cp.processTo(symbolAndClause); - cp.processEnter(symbolAndClause); - cp.processLink(symbolAndClause); - cp.processDeviceType(deviceType); - cp.processTODO( - converter.getCurrentLocation(), - llvm::omp::Directive::OMPD_declare_target); - } + auto reductionCallback = [&](mlir::Operation *op) { + genReductionVars(op, converter, currentLocation, + *clauseOps.reductionSymbols, clauseOps.reductionTypes); + return *clauseOps.reductionSymbols; + }; - return deviceType; + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, currentLocation, *nestedEval) + .setReductions(*clauseOps.reductionSymbols, clauseOps.reductionTypes) + .setGenRegionEntryCb(reductionCallback) + .setGenNested(false), + clauseOps.linearVars, clauseOps.linearStepVars, clauseOps.reductionVars, + clauseOps.reductionDeclSymbols.empty() + ? nullptr + : firOpBuilder.getArrayAttr(clauseOps.reductionDeclSymbols), + clauseOps.scheduleValAttr, clauseOps.scheduleChunkVar, + clauseOps.scheduleModAttr, clauseOps.scheduleSimdAttr, + clauseOps.nowaitAttr, clauseOps.orderedAttr, clauseOps.orderAttr, + isComposite ? firOpBuilder.getUnitAttr() : nullptr); } -static void collectDeferredDeclareTargets( +//===----------------------------------------------------------------------===// +// Code generation functions for composite constructs +//===----------------------------------------------------------------------===// + +static void genCompositeDistributeParallelDo( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, - llvm::SmallVectorImpl - &deferredDeclareTarget) { - llvm::SmallVector symbolAndClause; - mlir::omp::DeclareTargetDeviceType devType = getDeclareTargetInfo( - converter, semaCtx, eval, declareTargetConstruct, symbolAndClause); - // Return the device type only if at least one of the targets for the - // directive is a function or subroutine - mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); - - for (const DeclareTargetCapturePair &symClause : symbolAndClause) { - mlir::Operation *op = mod.lookupSymbol(converter.mangleName( - std::get(symClause))); - - if (!op) { - deferredDeclareTarget.push_back( - {std::get<0>(symClause), devType, std::get<1>(symClause)}); - } - } -} - -static std::optional -getDeclareTargetFunctionDevice( + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + DistributeOpClauseOps distributeClauseOps; + ParallelOpClauseOps parallelClauseOps; + WsloopOpClauseOps wsLoopClauseOps; + + genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation, + distributeClauseOps); + // TODO evalNumThreadsOutsideTarget + genParallelClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, /*processReduction=*/true, + /*evalNumThreadsOutsideTarget=*/true, parallelClauseOps); + genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList, + currentLocation, wsLoopClauseOps); + + // TODO Pass clauseOps structures to generate wrappers + // genDistributeOp(); + // genParallelOp(); + // genWsLoopOp(); + TODO(currentLocation, "Composite DISTRIBUTE PARALLEL DO not implemented"); +} + +static void genCompositeDistributeParallelDoSimd( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPDeclareTargetConstruct - &declareTargetConstruct) { - llvm::SmallVector symbolAndClause; - mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo( - converter, semaCtx, eval, declareTargetConstruct, symbolAndClause); + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + DistributeOpClauseOps distributeClauseOps; + ParallelOpClauseOps parallelClauseOps; + WsloopOpClauseOps wsLoopClauseOps; + SimdLoopOpClauseOps simdClauseOps; + + genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation, + distributeClauseOps); + // TODO evalNumThreadsOutsideTarget + genParallelClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, /*processReduction=*/true, + /*evalNumThreadsOutsideTarget=*/true, parallelClauseOps); + genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList, + currentLocation, wsLoopClauseOps); + genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, simdClauseOps); + + // TODO Pass clauseOps structures to generate wrappers + // genDistributeOp(); + // genParallelOp(); + // genWsloopOp(); + // genSimdLoopOp(); + TODO(currentLocation, + "Composite DISTRIBUTE PARALLEL DO SIMD not implemented"); +} + +static void genCompositeDistributeSimd( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + DistributeOpClauseOps distributeClauseOps; + SimdLoopOpClauseOps simdClauseOps; - // Return the device type only if at least one of the targets for the - // directive is a function or subroutine - mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); - for (const DeclareTargetCapturePair &symClause : symbolAndClause) { - mlir::Operation *op = mod.lookupSymbol(converter.mangleName( - std::get(symClause))); + genDistributeClauses(converter, semaCtx, beginClauseList, currentLocation, + distributeClauseOps); + genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, simdClauseOps); - if (mlir::isa_and_nonnull(op)) - return deviceType; - } + // TODO Pass clauseOps structures to generate wrappers + // genDistributeOp(); + // genSimdLoopOp(); + TODO(currentLocation, "Composite DISTRIBUTE SIMD not implemented"); +} - return std::nullopt; +static void +genCompositeDoSimd(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + WsloopOpClauseOps wsLoopClauseOps; + SimdLoopOpClauseOps simdClauseOps; + + genWsLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, endClauseList, + currentLocation, wsLoopClauseOps); + genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, simdClauseOps); + + // TODO Pass clauseOps structures to generate wrappers + // genWsloopOp(); + // genSimdLoopOp(); + TODO(currentLocation, "Composite DO SIMD not implemented"); } static void -genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - const Fortran::parser::OpenMPSimpleStandaloneConstruct - &simpleStandaloneConstruct) { +genCompositeTaskLoopSimd(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList, + mlir::Location currentLocation) { + Fortran::lower::StatementContext stmtCtx; + TaskLoopOpClauseOps taskLoopClauseOps; + SimdLoopOpClauseOps simdClauseOps; + + genTaskLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, taskLoopClauseOps); + genSimdLoopClauses(converter, semaCtx, stmtCtx, beginClauseList, + currentLocation, simdClauseOps); + + // TODO Pass clauseOps structures to generate wrappers + // genTaskloopOp(); + // genSimdLoopOp(); + TODO(currentLocation, "Composite TASKLOOP SIMD not implemented"); +} + +//===----------------------------------------------------------------------===// +// genOMP() Code generation functions +//===----------------------------------------------------------------------===// + +static void genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPSimpleStandaloneConstruct + &simpleStandaloneConstruct) { const auto &directive = std::get( simpleStandaloneConstruct.t); - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - const auto &opClauseList = + const auto &clauseList = std::get(simpleStandaloneConstruct.t); mlir::Location currentLocation = converter.genLocation(directive.source); @@ -1588,33 +2204,29 @@ genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter, default: break; case llvm::omp::Directive::OMPD_barrier: - firOpBuilder.create(currentLocation); + genBarrierOp(converter, semaCtx, eval, currentLocation); break; case llvm::omp::Directive::OMPD_taskwait: - ClauseProcessor(converter, semaCtx, opClauseList) - .processTODO( - currentLocation, llvm::omp::Directive::OMPD_taskwait); - firOpBuilder.create(currentLocation); + genTaskWaitOp(converter, semaCtx, eval, currentLocation, clauseList); break; case llvm::omp::Directive::OMPD_taskyield: - firOpBuilder.create(currentLocation); + genTaskYieldOp(converter, semaCtx, eval, currentLocation); break; case llvm::omp::Directive::OMPD_target_data: - genDataOp(converter, semaCtx, eval, genNested, currentLocation, - opClauseList); + genDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, + clauseList); break; case llvm::omp::Directive::OMPD_target_enter_data: genEnterExitUpdateDataOp( - converter, semaCtx, currentLocation, opClauseList); + converter, semaCtx, currentLocation, clauseList); break; case llvm::omp::Directive::OMPD_target_exit_data: genEnterExitUpdateDataOp( - converter, semaCtx, currentLocation, opClauseList); + converter, semaCtx, currentLocation, clauseList); break; case llvm::omp::Directive::OMPD_target_update: genEnterExitUpdateDataOp( - converter, semaCtx, currentLocation, opClauseList); + converter, semaCtx, currentLocation, clauseList); break; case llvm::omp::Directive::OMPD_ordered: TODO(currentLocation, "OMPD_ordered"); @@ -1622,28 +2234,21 @@ genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter, } static void -genOmpFlush(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { - llvm::SmallVector operandRange; - if (const auto &ompObjectList = - std::get>( - flushConstruct.t)) - genObjectList(*ompObjectList, converter, operandRange); - const auto &memOrderClause = +genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { + const auto &verbatim = std::get(flushConstruct.t); + const auto &objectList = + std::get>(flushConstruct.t); + const auto &clauseList = std::get>>( flushConstruct.t); - if (memOrderClause && memOrderClause->size() > 0) - TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause"); - converter.getFirOpBuilder().create( - converter.getCurrentLocation(), operandRange); + mlir::Location currentLocation = converter.genLocation(verbatim.source); + genFlushOp(converter, semaCtx, eval, currentLocation, objectList, clauseList); } -//===----------------------------------------------------------------------===// -// genOMP() Code generation helper functions -//===----------------------------------------------------------------------===// - static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, @@ -1654,12 +2259,11 @@ genOMP(Fortran::lower::AbstractConverter &converter, Fortran::common::visitors{ [&](const Fortran::parser::OpenMPSimpleStandaloneConstruct &simpleStandaloneConstruct) { - genOmpSimpleStandalone(converter, semaCtx, eval, - /*genNested=*/true, - simpleStandaloneConstruct); + genOMP(converter, symTable, semaCtx, eval, + simpleStandaloneConstruct); }, [&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { - genOmpFlush(converter, semaCtx, eval, flushConstruct); + genOMP(converter, symTable, semaCtx, eval, flushConstruct); }, [&](const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) { TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); @@ -1672,300 +2276,6 @@ genOMP(Fortran::lower::AbstractConverter &converter, standaloneConstruct.u); } -static void convertLoopBounds(Fortran::lower::AbstractConverter &converter, - mlir::Location loc, - llvm::SmallVectorImpl &lowerBound, - llvm::SmallVectorImpl &upperBound, - llvm::SmallVectorImpl &step, - std::size_t loopVarTypeSize) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - // The types of lower bound, upper bound, and step are converted into the - // type of the loop variable if necessary. - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - for (unsigned it = 0; it < (unsigned)lowerBound.size(); it++) { - lowerBound[it] = - firOpBuilder.createConvert(loc, loopVarType, lowerBound[it]); - upperBound[it] = - firOpBuilder.createConvert(loc, loopVarType, upperBound[it]); - step[it] = firOpBuilder.createConvert(loc, loopVarType, step[it]); - } -} - -static llvm::SmallVector -genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, - mlir::Location &loc, - const llvm::SmallVector &args) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - auto ®ion = op->getRegion(0); - - std::size_t loopVarTypeSize = 0; - for (const Fortran::semantics::Symbol *arg : args) - loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - llvm::SmallVector tiv(args.size(), loopVarType); - llvm::SmallVector locs(args.size(), loc); - firOpBuilder.createBlock(®ion, {}, tiv, locs); - // The argument is not currently in memory, so make a temporary for the - // argument, and store it there, then bind that location to the argument. - mlir::Operation *storeOp = nullptr; - for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { - mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); - storeOp = - createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); - } - firOpBuilder.setInsertionPointAfter(storeOp); - - return args; -} - -static llvm::SmallVector -genLoopAndReductionVars( - mlir::Operation *op, Fortran::lower::AbstractConverter &converter, - mlir::Location &loc, - const llvm::SmallVector &loopArgs, - const llvm::SmallVector &reductionArgs, - llvm::SmallVector &reductionTypes) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - - llvm::SmallVector blockArgTypes; - llvm::SmallVector blockArgLocs; - blockArgTypes.reserve(loopArgs.size() + reductionArgs.size()); - blockArgLocs.reserve(blockArgTypes.size()); - mlir::Block *entryBlock; - - if (loopArgs.size()) { - std::size_t loopVarTypeSize = 0; - for (const Fortran::semantics::Symbol *arg : loopArgs) - loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(), - loopVarType); - std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc); - } - if (reductionArgs.size()) { - llvm::copy(reductionTypes, std::back_inserter(blockArgTypes)); - std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc); - } - entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes, - blockArgLocs); - // The argument is not currently in memory, so make a temporary for the - // argument, and store it there, then bind that location to the argument. - if (loopArgs.size()) { - mlir::Operation *storeOp = nullptr; - for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) { - mlir::Value indexVal = - fir::getBase(op->getRegion(0).front().getArgument(argIndex)); - storeOp = - createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); - } - firOpBuilder.setInsertionPointAfter(storeOp); - } - // Bind the reduction arguments to their block arguments - for (auto [arg, prv] : llvm::zip_equal( - reductionArgs, - llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) { - converter.bindSymbol(*arg, prv); - } - - return loopArgs; -} - -static void -createSimdLoop(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - llvm::omp::Directive ompDirective, - const Fortran::parser::OmpClauseList &loopOpClauseList, - mlir::Location loc) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - DataSharingProcessor dsp(converter, loopOpClauseList, eval); - dsp.processStep1(); - - Fortran::lower::StatementContext stmtCtx; - mlir::Value scheduleChunkClauseOperand, ifClauseOperand; - llvm::SmallVector lowerBound, upperBound, step, reductionVars; - llvm::SmallVector alignedVars, nontemporalVars; - llvm::SmallVector iv; - llvm::SmallVector reductionDeclSymbols; - mlir::omp::ClauseOrderKindAttr orderClauseOperand; - mlir::IntegerAttr simdlenClauseOperand, safelenClauseOperand; - std::size_t loopVarTypeSize; - - ClauseProcessor cp(converter, semaCtx, loopOpClauseList); - cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv, - loopVarTypeSize); - cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand); - cp.processReduction(loc, reductionVars, reductionDeclSymbols); - cp.processIf(Fortran::parser::OmpIfClause::DirectiveNameModifier::Simd, - ifClauseOperand); - cp.processSimdlen(simdlenClauseOperand); - cp.processSafelen(safelenClauseOperand); - cp.processTODO(loc, ompDirective); - - convertLoopBounds(converter, loc, lowerBound, upperBound, step, - loopVarTypeSize); - - mlir::TypeRange resultType; - auto simdLoopOp = firOpBuilder.create( - loc, resultType, lowerBound, upperBound, step, alignedVars, - /*alignment_values=*/nullptr, ifClauseOperand, nontemporalVars, - orderClauseOperand, simdlenClauseOperand, safelenClauseOperand, - /*inclusive=*/firOpBuilder.getUnitAttr()); - - auto *nestedEval = getCollapsedLoopEval( - eval, Fortran::lower::getCollapseValue(loopOpClauseList)); - - auto ivCallback = [&](mlir::Operation *op) { - return genLoopVars(op, converter, loc, iv); - }; - - createBodyOfOp( - simdLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) - .setClauses(&loopOpClauseList) - .setDataSharingProcessor(&dsp) - .setGenRegionEntryCb(ivCallback)); -} - -static void createWsLoop(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - llvm::omp::Directive ompDirective, - const Fortran::parser::OmpClauseList &beginClauseList, - const Fortran::parser::OmpClauseList *endClauseList, - mlir::Location loc) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - DataSharingProcessor dsp(converter, beginClauseList, eval); - dsp.processStep1(); - - Fortran::lower::StatementContext stmtCtx; - mlir::Value scheduleChunkClauseOperand; - llvm::SmallVector lowerBound, upperBound, step, reductionVars; - llvm::SmallVector linearVars, linearStepVars; - llvm::SmallVector iv; - llvm::SmallVector reductionDeclSymbols; - llvm::SmallVector reductionSymbols; - mlir::omp::ClauseOrderKindAttr orderClauseOperand; - mlir::omp::ClauseScheduleKindAttr scheduleValClauseOperand; - mlir::UnitAttr nowaitClauseOperand, scheduleSimdClauseOperand; - mlir::IntegerAttr orderedClauseOperand; - mlir::omp::ScheduleModifierAttr scheduleModClauseOperand; - std::size_t loopVarTypeSize; - - ClauseProcessor cp(converter, semaCtx, beginClauseList); - cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv, - loopVarTypeSize); - cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand); - cp.processReduction(loc, reductionVars, reductionDeclSymbols, - &reductionSymbols); - cp.processTODO(loc, ompDirective); - - convertLoopBounds(converter, loc, lowerBound, upperBound, step, - loopVarTypeSize); - - auto wsLoopOp = firOpBuilder.create( - loc, lowerBound, upperBound, step, linearVars, linearStepVars, - reductionVars, - reductionDeclSymbols.empty() - ? nullptr - : mlir::ArrayAttr::get(firOpBuilder.getContext(), - reductionDeclSymbols), - scheduleValClauseOperand, scheduleChunkClauseOperand, - /*schedule_modifiers=*/nullptr, - /*simd_modifier=*/nullptr, nowaitClauseOperand, orderedClauseOperand, - orderClauseOperand, - /*inclusive=*/firOpBuilder.getUnitAttr()); - - // Handle attribute based clauses. - if (cp.processOrdered(orderedClauseOperand)) - wsLoopOp.setOrderedValAttr(orderedClauseOperand); - - if (cp.processSchedule(scheduleValClauseOperand, scheduleModClauseOperand, - scheduleSimdClauseOperand)) { - wsLoopOp.setScheduleValAttr(scheduleValClauseOperand); - wsLoopOp.setScheduleModifierAttr(scheduleModClauseOperand); - wsLoopOp.setSimdModifierAttr(scheduleSimdClauseOperand); - } - // In FORTRAN `nowait` clause occur at the end of `omp do` directive. - // i.e - // !$omp do - // <...> - // !$omp end do nowait - if (endClauseList) { - if (ClauseProcessor(converter, semaCtx, *endClauseList) - .processNowait(nowaitClauseOperand)) - wsLoopOp.setNowaitAttr(nowaitClauseOperand); - } - - auto *nestedEval = getCollapsedLoopEval( - eval, Fortran::lower::getCollapseValue(beginClauseList)); - - llvm::SmallVector reductionTypes; - reductionTypes.reserve(reductionVars.size()); - llvm::transform(reductionVars, std::back_inserter(reductionTypes), - [](mlir::Value v) { return v.getType(); }); - - auto ivCallback = [&](mlir::Operation *op) { - return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols, - reductionTypes); - }; - - createBodyOfOp( - wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) - .setClauses(&beginClauseList) - .setDataSharingProcessor(&dsp) - .setReductions(&reductionSymbols, &reductionTypes) - .setGenRegionEntryCb(ivCallback)); - - // Create trip_count if inside of omp.target and this is host compilation - auto offloadMod = llvm::dyn_cast( - firOpBuilder.getModule().getOperation()); - auto targetOp = wsLoopOp->getParentOfType(); - - if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() && - targetOp.isTargetSPMDLoop()) { - // Lower loop bounds and step, and process collapsing again, putting lowered - // values outside of omp.target this time. This enables calculating and - // accessing the trip count in the host, which is needed when lowering to - // LLVM IR via the OMPIRBuilder. - HostClausesInsertionGuard guard(firOpBuilder); - llvm::SmallVector outsideLB, outsideUB, outsideStep; - llvm::SmallVector outsideIV; - cp.processCollapse(loc, eval, outsideLB, outsideUB, outsideStep, outsideIV, - loopVarTypeSize); - targetOp.getTripCountMutable().assign( - calculateTripCount(converter, loc, outsideLB, outsideUB, outsideStep)); - } -} - -static void createSimdWsLoop( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive ompDirective, - const Fortran::parser::OmpClauseList &beginClauseList, - const Fortran::parser::OmpClauseList *endClauseList, mlir::Location loc) { - ClauseProcessor cp(converter, semaCtx, beginClauseList); - cp.processTODO< - Fortran::parser::OmpClause::Aligned, Fortran::parser::OmpClause::Allocate, - Fortran::parser::OmpClause::Linear, Fortran::parser::OmpClause::Safelen, - Fortran::parser::OmpClause::Simdlen, Fortran::parser::OmpClause::Order>( - loc, ompDirective); - // TODO: Add support for vectorization - add vectorization hints inside loop - // body. - // OpenMP standard does not specify the length of vector instructions. - // Currently we safely assume that for !$omp do simd pragma the SIMD length - // is equal to 1 (i.e. we generate standard workshare loop). - // When support for vectorization is enabled, then we need to add handling of - // if clause. Currently if clause can be skipped because we always assume - // SIMD length = 1. - createWsLoop(converter, semaCtx, eval, ompDirective, beginClauseList, - endClauseList, loc); -} - static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, @@ -1973,13 +2283,16 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { const auto &beginLoopDirective = std::get(loopConstruct.t); - const auto &loopOpClauseList = + const auto &beginClauseList = std::get(beginLoopDirective.t); mlir::Location currentLocation = converter.genLocation(beginLoopDirective.source); - const auto ompDirective = + llvm::omp::Directive origDirective = std::get(beginLoopDirective.t).v; + assert(llvm::omp::loopConstructSet.test(origDirective) && + "Expected loop construct"); + const auto *endClauseList = [&]() { using RetTy = const Fortran::parser::OmpClauseList *; if (auto &endLoopDirective = @@ -1991,61 +2304,191 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, return RetTy(); }(); - bool validDirective = false; - if (llvm::omp::topTaskloopSet.test(ompDirective)) { - validDirective = true; - TODO(currentLocation, "Taskloop construct"); - } else { - // Create omp.{target, teams, distribute, parallel} nested operations - if ((llvm::omp::allTargetSet & llvm::omp::loopConstructSet) - .test(ompDirective)) { - validDirective = true; - genTargetOp(converter, semaCtx, eval, /*genNested=*/false, - currentLocation, loopOpClauseList, ompDirective, - /*outerCombined=*/true); + /// Utility to remove the first leaf construct from a combined loop construct. + /// Composite constructs are not handled, as they cannot be split in that way. + auto peelCombinedLoopDirective = + [](llvm::omp::Directive dir) -> llvm::omp::Directive { + using D = llvm::omp::Directive; + switch (dir) { + case D::OMPD_masked_taskloop: + case D::OMPD_master_taskloop: + return D::OMPD_taskloop; + case D::OMPD_masked_taskloop_simd: + case D::OMPD_master_taskloop_simd: + return D::OMPD_taskloop_simd; + case D::OMPD_parallel_do: + return D::OMPD_do; + case D::OMPD_parallel_do_simd: + return D::OMPD_do_simd; + case D::OMPD_parallel_masked_taskloop: + return D::OMPD_masked_taskloop; + case D::OMPD_parallel_master_taskloop: + return D::OMPD_master_taskloop; + case D::OMPD_parallel_masked_taskloop_simd: + return D::OMPD_masked_taskloop_simd; + case D::OMPD_parallel_master_taskloop_simd: + return D::OMPD_master_taskloop_simd; + case D::OMPD_target_parallel_do: + return D::OMPD_parallel_do; + case D::OMPD_target_parallel_do_simd: + return D::OMPD_parallel_do_simd; + case D::OMPD_target_simd: + return D::OMPD_simd; + case D::OMPD_target_teams_distribute: + return D::OMPD_teams_distribute; + case D::OMPD_target_teams_distribute_parallel_do: + return D::OMPD_teams_distribute_parallel_do; + case D::OMPD_target_teams_distribute_parallel_do_simd: + return D::OMPD_teams_distribute_parallel_do_simd; + case D::OMPD_target_teams_distribute_simd: + return D::OMPD_teams_distribute_simd; + case D::OMPD_teams_distribute: + return D::OMPD_distribute; + case D::OMPD_teams_distribute_parallel_do: + return D::OMPD_distribute_parallel_do; + case D::OMPD_teams_distribute_parallel_do_simd: + return D::OMPD_distribute_parallel_do_simd; + case D::OMPD_teams_distribute_simd: + return D::OMPD_distribute_simd; + case D::OMPD_parallel_loop: + case D::OMPD_teams_loop: + return D::OMPD_loop; + case D::OMPD_target_parallel_loop: + return D::OMPD_parallel_loop; + case D::OMPD_target_teams_loop: + return D::OMPD_teams_loop; + default: + llvm_unreachable("Unexpected non-combined loop construct"); } - if ((llvm::omp::allTeamsSet & llvm::omp::loopConstructSet) - .test(ompDirective)) { - validDirective = true; - genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, - loopOpClauseList, /*outerCombined=*/true); - } - if (llvm::omp::allDistributeSet.test(ompDirective)) { - validDirective = true; - bool outerCombined = llvm::omp::topDistributeSet.test(ompDirective); - genDistributeOp(converter, semaCtx, eval, /*genNested=*/false, - currentLocation, loopOpClauseList, outerCombined); - } - if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet) - .test(ompDirective)) { - validDirective = true; - genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, - currentLocation, loopOpClauseList, - /*outerCombined=*/true); + }; + + // Privatization and loop nest clause processing must be done before producing + // any wrappers and after combined constructs, so that any operations created + // are outside of the wrapper nest. + DataSharingProcessor dsp(converter, beginClauseList, eval); + LoopNestOpClauseOps clauseOps; + auto processLoopNestClauses = [&]() { + dsp.processStep1(); + genLoopNestClauses(converter, semaCtx, eval, beginClauseList, + currentLocation, clauseOps); + }; + + llvm::omp::Directive ompDirective = origDirective; + if (llvm::omp::topTargetSet.test(ompDirective)) { + // TODO Combined constructs: Call genClauses and pass them in. + genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, + beginClauseList, /*outerCombined=*/true); + ompDirective = peelCombinedLoopDirective(ompDirective); + } + + if (llvm::omp::topTeamsSet.test(ompDirective)) { + genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, + beginClauseList, /*outerCombined=*/true); + ompDirective = peelCombinedLoopDirective(ompDirective); + } + + if (llvm::omp::topParallelSet.test(ompDirective)) { + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, + /*isComposite=*/false, currentLocation, beginClauseList, + /*outerCombined=*/true); + ompDirective = peelCombinedLoopDirective(ompDirective); + processLoopNestClauses(); + } else { + processLoopNestClauses(); + + if (llvm::omp::topDistributeSet.test(ompDirective)) { + switch (ompDirective) { + case llvm::omp::Directive::OMPD_distribute: + genDistributeOp(converter, semaCtx, eval, /*isComposite=*/false, + currentLocation, beginClauseList, + /*outerCombined=*/true); + break; + case llvm::omp::Directive::OMPD_distribute_parallel_do: + genCompositeDistributeParallelDo(converter, semaCtx, eval, + beginClauseList, endClauseList, + currentLocation); + break; + case llvm::omp::Directive::OMPD_distribute_parallel_do_simd: + genCompositeDistributeParallelDoSimd(converter, semaCtx, eval, + beginClauseList, endClauseList, + currentLocation); + break; + case llvm::omp::Directive::OMPD_distribute_simd: + genCompositeDistributeSimd(converter, semaCtx, eval, beginClauseList, + endClauseList, currentLocation); + break; + default: + llvm_unreachable("Unexpected DISTRIBUTE construct"); + } + } else if (llvm::omp::topTaskloopSet.test(ompDirective)) { + switch (ompDirective) { + case llvm::omp::Directive::OMPD_taskloop_simd: + genCompositeTaskLoopSimd(converter, semaCtx, eval, beginClauseList, + endClauseList, currentLocation); + break; + case llvm::omp::Directive::OMPD_taskloop: + genTaskLoopOp(converter, semaCtx, eval, /*isComposite=*/false, + currentLocation, beginClauseList); + break; + default: + llvm_unreachable("Unexpected TASKLOOP construct"); + } + } else if (ompDirective == llvm::omp::Directive::OMPD_simd) { + genSimdLoopOp(converter, semaCtx, eval, /*isComposite=*/false, + currentLocation, beginClauseList); + } else if (!llvm::omp::topDoSet.test(ompDirective)) { + TODO(currentLocation, + "Unhandled loop directive (" + + llvm::omp::getOpenMPDirectiveName(origDirective) + ")"); } } - if ((llvm::omp::allDoSet | llvm::omp::allSimdSet).test(ompDirective)) - validDirective = true; - if (!validDirective) { + if (llvm::omp::topDoSet.test(ompDirective)) { + switch (ompDirective) { + case llvm::omp::Directive::OMPD_do_simd: + genCompositeDoSimd(converter, semaCtx, eval, beginClauseList, + endClauseList, currentLocation); + break; + case llvm::omp::Directive::OMPD_do: + genWsLoopOp(converter, semaCtx, eval, /*isComposite=*/false, + currentLocation, beginClauseList, endClauseList); + break; + default: + llvm_unreachable("Unexpected DO construct"); + } + } else if (llvm::omp::allParallelSet.test(origDirective)) { TODO(currentLocation, "Unhandled loop directive (" + - llvm::omp::getOpenMPDirectiveName(ompDirective) + + llvm::omp::getOpenMPDirectiveName(origDirective) + ")"); } - if (llvm::omp::allDoSimdSet.test(ompDirective)) { - // 2.9.3.2 Workshare SIMD construct - createSimdWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, - endClauseList, currentLocation); + // Create inner loop nest and body. + mlir::omp::LoopNestOp loopNestOp = + genLoopNestOp(converter, semaCtx, eval, currentLocation, beginClauseList, + clauseOps, dsp); - } else if (llvm::omp::allSimdSet.test(ompDirective)) { - // 2.9.3.1 SIMD construct - createSimdLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, - currentLocation); - genOpenMPReduction(converter, semaCtx, loopOpClauseList); - } else { - createWsLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, - endClauseList, currentLocation); + if (ompDirective == llvm::omp::Directive::OMPD_simd) + genOpenMPReduction(converter, semaCtx, beginClauseList); + + // Create trip_count outside of omp.target if this is host compilation and the + // loop is inside of a target region. + auto offloadMod = llvm::dyn_cast( + converter.getModuleOp().getOperation()); + auto targetOp = loopNestOp->getParentOfType(); + + if (offloadMod && targetOp && !offloadMod.getIsTargetDevice() && + targetOp.isTargetSPMDLoop()) { + // Lower loop bounds and step, and process collapsing again, putting lowered + // values outside of omp.target this time. This enables calculating and + // accessing the trip count in the host, which is needed when lowering to + // LLVM IR via the OMPIRBuilder. + HostClausesInsertionGuard guard(converter.getFirOpBuilder()); + CollapseClauseOps collapseOps; + ClauseProcessor(converter, semaCtx, beginClauseList) + .processCollapse(currentLocation, eval, collapseOps); + targetOp.getTripCountMutable().assign( + calculateTripCount(converter, currentLocation, collapseOps.loopLBVar, + collapseOps.loopUBVar, collapseOps.loopStepVar)); } } @@ -2066,6 +2509,9 @@ genOMP(Fortran::lower::AbstractConverter &converter, const auto &endClauseList = std::get(endBlockDirective.t); + assert(llvm::omp::blockConstructSet.test(directive.v) && + "Expected block construct"); + for (const Fortran::parser::OmpClause &clause : beginClauseList.v) { mlir::Location clauseLocation = converter.genLocation(clause.source); if (!std::get_if(&clause.u) && @@ -2106,11 +2552,11 @@ genOMP(Fortran::lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_ordered: genOrderedRegionOp(converter, semaCtx, eval, /*genNested=*/true, - currentLocation); + currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_parallel: genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true, - currentLocation, beginClauseList); + /*isComposite=*/false, currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, @@ -2118,7 +2564,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_target: genTargetOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, - beginClauseList, directive.v); + beginClauseList); break; case llvm::omp::Directive::OMPD_target_data: genDataOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, @@ -2153,29 +2599,25 @@ genOMP(Fortran::lower::AbstractConverter &converter, // Codegen for combined directives bool combinedDirective = false; - if ((llvm::omp::allTargetSet & llvm::omp::blockConstructSet) - .test(directive.v)) { + if (llvm::omp::allTargetSet.test(directive.v)) { genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, - beginClauseList, directive.v, - /*outerCombined=*/true); + beginClauseList, /*outerCombined=*/true); combinedDirective = true; } - if ((llvm::omp::allTeamsSet & llvm::omp::blockConstructSet) - .test(directive.v)) { + if (llvm::omp::allTeamsSet.test(directive.v)) { genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList); combinedDirective = true; } - if ((llvm::omp::allParallelSet & llvm::omp::blockConstructSet) - .test(directive.v)) { + if (llvm::omp::allParallelSet.test(directive.v)) { bool outerCombined = directive.v != llvm::omp::Directive::OMPD_target_parallel; genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, - currentLocation, beginClauseList, outerCombined); + /*isComposite=*/false, currentLocation, beginClauseList, + outerCombined); combinedDirective = true; } - if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet) - .test(directive.v)) { + if (llvm::omp::workShareSet.test(directive.v)) { genSingleOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList, endClauseList); combinedDirective = true; @@ -2194,38 +2636,13 @@ genOMP(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPCriticalConstruct &criticalConstruct) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Location currentLocation = converter.getCurrentLocation(); - mlir::IntegerAttr hintClauseOp; - std::string name; - const Fortran::parser::OmpCriticalDirective &cd = + const auto &cd = std::get(criticalConstruct.t); - if (std::get>(cd.t).has_value()) { - name = - std::get>(cd.t).value().ToString(); - } - const auto &clauseList = std::get(cd.t); - ClauseProcessor(converter, semaCtx, clauseList).processHint(hintClauseOp); - - mlir::omp::CriticalOp criticalOp = [&]() { - if (name.empty()) { - return firOpBuilder.create( - currentLocation, mlir::FlatSymbolRefAttr()); - } - mlir::ModuleOp module = firOpBuilder.getModule(); - mlir::OpBuilder modBuilder(module.getBodyRegion()); - auto global = module.lookupSymbol(name); - if (!global) - global = modBuilder.create( - currentLocation, - mlir::StringAttr::get(firOpBuilder.getContext(), name), hintClauseOp); - return firOpBuilder.create( - currentLocation, mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), - global.getSymName())); - }(); - auto genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval); - createBodyOfOp(criticalOp, genInfo); + const auto &name = std::get>(cd.t); + mlir::Location currentLocation = converter.getCurrentLocation(); + genCriticalOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, + clauseList, name); } static void @@ -2234,55 +2651,48 @@ genOMP(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPSectionsConstruct §ionsConstruct) { - mlir::Location currentLocation = converter.getCurrentLocation(); - llvm::SmallVector allocateOperands, allocatorOperands; - mlir::UnitAttr nowaitClauseOperand; const auto &beginSectionsDirective = std::get(sectionsConstruct.t); - const auto §ionsClauseList = + const auto &beginClauseList = std::get(beginSectionsDirective.t); - - // Process clauses before optional omp.parallel, so that new variables are - // allocated outside of the parallel region - ClauseProcessor cp(converter, semaCtx, sectionsClauseList); - cp.processSectionsReduction(currentLocation); - cp.processAllocate(allocatorOperands, allocateOperands); - llvm::omp::Directive dir = std::get(beginSectionsDirective.t) .v; + const auto §ionBlocks = + std::get(sectionsConstruct.t); + + // Process clauses before optional omp.parallel, so that new variables are + // allocated outside of the parallel region. + mlir::Location currentLocation = converter.getCurrentLocation(); + SectionsOpClauseOps clauseOps; + genSectionsClauses(converter, semaCtx, beginClauseList, currentLocation, + /*clausesFromBeginSections=*/true, clauseOps); - // Parallel wrapper of PARALLEL SECTIONS construct + // Parallel wrapper of PARALLEL SECTIONS construct. if (dir == llvm::omp::Directive::OMPD_parallel_sections) { genParallelOp(converter, symTable, semaCtx, eval, - /*genNested=*/false, currentLocation, sectionsClauseList, - /*outerCombined=*/true); + /*genNested=*/false, /*isComposite=*/false, currentLocation, + beginClauseList, /*outerCombined=*/true); } else { const auto &endSectionsDirective = std::get(sectionsConstruct.t); - const auto &endSectionsClauseList = + const auto &endClauseList = std::get(endSectionsDirective.t); - ClauseProcessor(converter, semaCtx, endSectionsClauseList) - .processNowait(nowaitClauseOperand); + genSectionsClauses(converter, semaCtx, endClauseList, currentLocation, + /*clausesFromBeginSections=*/false, clauseOps); } - // SECTIONS construct - genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(false), - /*reduction_vars=*/mlir::ValueRange(), - /*reductions=*/nullptr, allocateOperands, allocatorOperands, - nowaitClauseOperand); + // SECTIONS construct. + genSectionsOp(converter, semaCtx, eval, currentLocation, clauseOps); - const auto §ionBlocks = - std::get(sectionsConstruct.t); + // Generate nested SECTION operations recursively. auto &firOpBuilder = converter.getFirOpBuilder(); auto ip = firOpBuilder.saveInsertionPoint(); for (const auto &[nblock, neval] : llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) { symTable.pushScope(); genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation, - sectionsClauseList); + beginClauseList); symTable.popScope(); firOpBuilder.restoreInsertionPoint(ip); } @@ -2366,12 +2776,12 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { - llvm::SmallVector symbolAndClause; mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + DeclareTargetOpClauseOps clauseOps; mlir::omp::DeclareTargetDeviceType deviceType = getDeclareTargetInfo( - converter, semaCtx, eval, declareTargetConstruct, symbolAndClause); + converter, semaCtx, eval, declareTargetConstruct, clauseOps); - for (const DeclareTargetCapturePair &symClause : symbolAndClause) { + for (const DeclareTargetCapturePair &symClause : clauseOps.symbolAndClause) { mlir::Operation *op = mod.lookupSymbol(converter.mangleName( std::get(symClause))); @@ -2482,11 +2892,10 @@ genOMP(Fortran::lower::AbstractConverter &converter, mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, mlir::Operation *op, mlir::Location loc) { - if (mlir::isa(op)) + if (mlir::isa(op)) return builder.create(loc); - else - return builder.create(loc); + return builder.create(loc); } void Fortran::lower::genOpenMPConstruct( diff --git a/flang/lib/Lower/OpenMP/OperationClauses.h b/flang/lib/Lower/OpenMP/OperationClauses.h new file mode 100644 index 0000000000000..ed49c3f0750b1 --- /dev/null +++ b/flang/lib/Lower/OpenMP/OperationClauses.h @@ -0,0 +1,306 @@ +//===-- Lower/OpenMP/OperationClauses.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "Utils.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include + +namespace Fortran { +namespace semantics { +class Symbol; +} // namespace semantics +} // namespace Fortran + +namespace Fortran { +namespace lower { +namespace omp { + +//===----------------------------------------------------------------------===// +// Mixin structures defining operands associated with each OpenMP clause. +//===----------------------------------------------------------------------===// + +struct AlignedClauseOps { + llvm::SmallVector alignedVars; + llvm::SmallVector alignmentAttrs; +}; + +struct AllocateClauseOps { + llvm::SmallVector allocatorVars, allocateVars; +}; + +struct CollapseClauseOps { + llvm::SmallVector loopLBVar, loopUBVar, loopStepVar; + llvm::SmallVector loopIV; +}; + +struct CopyinClauseOps {}; + +struct CopyprivateClauseOps { + llvm::SmallVector copyprivateVars; + llvm::SmallVector copyprivateFuncs; +}; + +struct DependClauseOps { + llvm::SmallVector dependTypeAttrs; + llvm::SmallVector dependVars; +}; + +struct DeviceClauseOps { + mlir::Value deviceVar; +}; + +struct DeviceTypeClauseOps { + mlir::omp::DeclareTargetDeviceType deviceType; +}; + +struct DistScheduleClauseOps { + mlir::UnitAttr distScheduleStaticAttr; + mlir::Value distScheduleChunkSizeVar; +}; + +struct EnterLinkToClauseOps { + llvm::SmallVector symbolAndClause; +}; + +struct FinalClauseOps { + mlir::Value finalVar; +}; + +struct GrainsizeClauseOps { + mlir::Value grainsizeVar; +}; + +struct HintClauseOps { + mlir::IntegerAttr hintAttr; +}; + +struct IfClauseOps { + mlir::Value ifVar; +}; + +struct InReductionClauseOps { + llvm::SmallVector inReductionVars; + llvm::SmallVector inReductionTypes; + llvm::SmallVector inReductionDeclSymbols; + std::optional> + inReductionSymbols; +}; + +struct LinearClauseOps { + llvm::SmallVector linearVars, linearStepVars; +}; + +// The optional parameters - mapSymTypes, mapSymLocs & mapSymbols are used to +// store the original type, location and Fortran symbol for the map operands. +// They may be used later on to create the block_arguments for some of the +// target directives that require it. +struct MapClauseOps { + llvm::SmallVector mapVars; + std::optional> mapSymTypes; + std::optional> mapSymLocs; + std::optional> + mapSymbols; +}; + +struct MergeableClauseOps { + mlir::UnitAttr mergeableAttr; +}; + +struct NogroupClauseOps { + mlir::UnitAttr nogroupAttr; +}; + +struct NontemporalClauseOps { + llvm::SmallVector nontemporalVars; +}; + +struct NowaitClauseOps { + mlir::UnitAttr nowaitAttr; +}; + +struct NumTasksClauseOps { + mlir::Value numTasksVar; +}; + +struct NumTeamsClauseOps { + mlir::Value numTeamsLowerVar; + mlir::Value numTeamsUpperVar; +}; + +struct NumThreadsClauseOps { + mlir::Value numThreadsVar; +}; + +struct OrderClauseOps { + mlir::omp::ClauseOrderKindAttr orderAttr; +}; + +struct OrderedClauseOps { + mlir::IntegerAttr orderedAttr; +}; + +struct ParallelizationLevelClauseOps { + mlir::UnitAttr parLevelThreadsAttr; + mlir::UnitAttr parLevelSimdAttr; +}; + +struct PriorityClauseOps { + mlir::Value priorityVar; +}; + +struct PrivateClauseOps { + llvm::SmallVector privateVars; + llvm::SmallVector privatizers; +}; + +struct ProcBindClauseOps { + mlir::omp::ClauseProcBindKindAttr procBindKindAttr; +}; + +struct ReductionClauseOps { + llvm::SmallVector reductionVars; + llvm::SmallVector reductionTypes; + llvm::SmallVector reductionDeclSymbols; + std::optional> + reductionSymbols; +}; + +struct SafelenClauseOps { + mlir::IntegerAttr safelenAttr; +}; + +struct ScheduleClauseOps { + mlir::omp::ClauseScheduleKindAttr scheduleValAttr; + mlir::omp::ScheduleModifierAttr scheduleModAttr; + mlir::Value scheduleChunkVar; + mlir::UnitAttr scheduleSimdAttr; +}; + +struct SimdlenClauseOps { + mlir::IntegerAttr simdlenAttr; +}; + +struct TargetReductionClauseOps { + llvm::SmallVector targetReductionSymbols; +}; + +struct TaskReductionClauseOps { + llvm::SmallVector taskReductionVars; + llvm::SmallVector taskReductionTypes; + llvm::SmallVector taskReductionDeclSymbols; + std::optional> + taskReductionSymbols; +}; + +struct ThreadLimitClauseOps { + mlir::Value threadLimitVar; +}; + +struct UntiedClauseOps { + mlir::UnitAttr untiedAttr; +}; + +struct UseDeviceClauseOps { + llvm::SmallVector useDevicePtrVars; + llvm::SmallVector useDeviceAddrVars; + llvm::SmallVector useDeviceTypes; + llvm::SmallVector useDeviceLocs; + llvm::SmallVector useDeviceSymbols; +}; + +//===----------------------------------------------------------------------===// +// Structures defining clause operands associated with each OpenMP leaf +// construct. +// +// These mirror the arguments expected by the corresponding OpenMP MLIR ops. +//===----------------------------------------------------------------------===// + +namespace detail { +template +struct Clauses : public Mixins... {}; +} // namespace detail + +using CriticalDeclareOpClauseOps = detail::Clauses; + +using DataOpClauseOps = detail::Clauses; + +using DeclareTargetOpClauseOps = detail::Clauses; + +using DistributeOpClauseOps = + detail::Clauses; + +using EnterExitUpdateDataOpClauseOps = + detail::Clauses; + +using LoopNestOpClauseOps = detail::Clauses; + +// TODO Rename to "masked" +// TODO `filter` clause. +using MasterOpClauseOps = detail::Clauses<>; + +using OrderedRegionOpClauseOps = detail::Clauses; + +using ParallelOpClauseOps = + detail::Clauses; + +using SectionsOpClauseOps = + detail::Clauses; + +// TODO `linear` clause. +using SimdLoopOpClauseOps = + detail::Clauses; + +using SingleOpClauseOps = + detail::Clauses; + +// TODO `allocate`, `defaultmap`, `has_device_addr`, `in_reduction`, +// `is_device_ptr`, `uses_allocators` clauses. +using TargetOpClauseOps = + detail::Clauses; + +using TaskGroupOpClauseOps = + detail::Clauses; + +using TaskLoopOpClauseOps = + detail::Clauses; + +// TODO `affinity`, `detach` clauses. +using TaskOpClauseOps = + detail::Clauses; + +// TODO `depend`, `nowait` clauses. +using TaskWaitOpClauseOps = detail::Clauses<>; + +using TeamsOpClauseOps = + detail::Clauses; + +// TODO `allocate` clause. +using WsloopOpClauseOps = + detail::Clauses; + +} // namespace omp +} // namespace lower +} // namespace Fortran diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index 3b72148867874..2946d3ef2e6e1 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -60,7 +60,7 @@ ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType( void ReductionProcessor::addReductionSym( const Fortran::parser::OmpReductionClause &reduction, - llvm::SmallVector &symbols) { + llvm::SmallVectorImpl &symbols) { const auto &objectList{std::get(reduction.t)}; for (const Fortran::parser::OmpObject &ompObject : objectList.v) { diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h index 85c286ead5282..abbd737084dc5 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.h +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h @@ -101,7 +101,7 @@ class ReductionProcessor { static void addReductionSym( const Fortran::parser::OmpReductionClause &reduction, - llvm::SmallVector &symbols); + llvm::SmallVectorImpl &symbols); /// Creates an OpenMP reduction declaration and inserts it into the provided /// symbol table. The declaration has a constant initializer with the neutral diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index d1bf73ba1dfd2..2858412d77561 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -67,6 +68,24 @@ void genObjectList(const Fortran::parser::OmpObjectList &objectList, } } +mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, + std::size_t loopVarTypeSize) { + // OpenMP runtime requires 32-bit or 64-bit loop variables. + loopVarTypeSize = loopVarTypeSize * 8; + if (loopVarTypeSize < 32) { + loopVarTypeSize = 32; + } else if (loopVarTypeSize > 64) { + loopVarTypeSize = 64; + mlir::emitWarning(converter.getCurrentLocation(), + "OpenMP loop iteration variable cannot have more than 64 " + "bits size and will be narrowed into 64 bits."); + } + assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) && + "OpenMP loop iteration variable size must be transformed into 32-bit " + "or 64-bit"); + return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); +} + void gatherFuncAndVarSyms( const Fortran::parser::OmpObjectList &objList, mlir::omp::DeclareTargetCaptureClause clause, diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 00b0165cec554..369654e546290 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -68,6 +68,9 @@ void genObjectList(const Fortran::parser::OmpObjectList &objectList, Fortran::lower::AbstractConverter &converter, llvm::SmallVectorImpl &operands); +mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, + std::size_t loopVarTypeSize); + mlir::omp::TargetOp findParentTargetOp(mlir::OpBuilder &builder); } // namespace omp diff --git a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp index f5a3d925ab5d9..e242dc5df6916 100644 --- a/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp @@ -105,9 +105,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { step.push_back(rewriter.clone(*stepOp)->getResult(0)); // ==== TODO (1) End ==== - auto wsLoopOp = rewriter.create( - doLoop.getLoc(), lowerBound, upperBound, step); - wsLoopOp.setInclusive(true); + auto wsLoopOp = rewriter.create(doLoop.getLoc()); + rewriter.createBlock(&wsLoopOp.getRegion()); + + // TODO Test that this didn't break something. + auto loopNestOp = rewriter.create( + doLoop.getLoc(), lowerBound, upperBound, step, /*inclusive=*/true); auto outlineableOp = mlir::dyn_cast(*parallelOp); @@ -180,11 +183,11 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { // Clone the loop's body inside the worksharing construct using the mapped // memref values. - rewriter.cloneRegionBefore(doLoop.getRegion(), wsLoopOp.getRegion(), - wsLoopOp.getRegion().begin(), mapper); + rewriter.cloneRegionBefore(doLoop.getRegion(), loopNestOp.getRegion(), + loopNestOp.getRegion().begin(), mapper); - mlir::Operation *terminator = wsLoopOp.getRegion().back().getTerminator(); - rewriter.setInsertionPointToEnd(&wsLoopOp.getRegion().back()); + mlir::Operation *terminator = loopNestOp.getRegion().back().getTerminator(); + rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back()); rewriter.create(terminator->getLoc()); rewriter.eraseOp(terminator); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index cf3bc9683cc89..7e6a590d10bb0 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -1204,7 +1204,7 @@ def TaskGroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments, // 2.10.4 taskyield Construct //===----------------------------------------------------------------------===// -def TaskyieldOp : OpenMP_Op<"taskyield"> { +def TaskYieldOp : OpenMP_Op<"taskyield"> { let summary = "taskyield construct"; let description = [{ The taskyield construct specifies that the current task can be suspended @@ -1723,6 +1723,10 @@ def TargetOp : OpenMP_Op<"target",[IsolatedFromAbove, MapClauseOwningOpInterface TODO: is_device_ptr, defaultmap, in_reduction }]; + // TODO Remove num_teams_lower, num_teams_upper, teams_thread_limit and + // num_threads args and instead calculate them outside and implicitly map + // them. If not used and implicitly mapped, they can be omitted from the + // outlined function arg list. let arguments = (ins Optional:$if_expr, Optional:$device, Optional:$thread_limit, @@ -1914,7 +1918,7 @@ def OrderedRegionOp : OpenMP_Op<"ordered_region"> { // 2.17.5 taskwait Construct //===----------------------------------------------------------------------===// -def TaskwaitOp : OpenMP_Op<"taskwait"> { +def TaskWaitOp : OpenMP_Op<"taskwait"> { let summary = "taskwait construct"; let description = [{ The taskwait construct specifies a wait on the completion of child tasks diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp index 83eb1653ca950..117db5c39db59 100644 --- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp +++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp @@ -319,8 +319,8 @@ void ConvertOpenMPToLLVMPass::runOnOperation() { populateOpenMPToLLVMConversionPatterns(converter, patterns); LLVMConversionTarget target(getContext()); - target.addLegalOp(); + target.addLegalOp(); configureOpenMPToLLVMConversionLegality(target, converter); if (failed(applyPartialConversion(module, target, std::move(patterns)))) signalPassFailure(); diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 464a647564ace..7bebd874f47b4 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -461,7 +461,11 @@ struct ParallelOpLowering : public OpRewritePattern { // Replace the loop. { OpBuilder::InsertionGuard allocaGuard(rewriter); - auto loop = rewriter.create( + // TODO Test that this didn't break something. + auto wsloop = rewriter.create(parallelOp.getLoc()); + rewriter.createBlock(&wsloop.getRegion()); + + auto loop = rewriter.create( parallelOp.getLoc(), parallelOp.getLowerBound(), parallelOp.getUpperBound(), parallelOp.getStep()); rewriter.create(loc); @@ -482,9 +486,9 @@ struct ParallelOpLowering : public OpRewritePattern { rewriter.setInsertionPointToEnd(&*scope.getBodyRegion().begin()); rewriter.create(loc, ValueRange()); if (!reductionVariables.empty()) { - loop.setReductionsAttr( + wsloop.setReductionsAttr( ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols)); - loop.getReductionVarsMutable().append(reductionVariables); + wsloop.getReductionVarsMutable().append(reductionVariables); } } } diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index c7130e8e2c9f3..bdaecb5e6de9e 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1933,6 +1933,9 @@ LogicalResult WsLoopOp::verify() { // TODO If composite, must have composite parallel parent or simd and no // wrapper parent. Otherwise, no composite parent. + // TODO If composite, must have composite parallel parent or simd and no + // wrapper parent. Otherwise, no composite parent. + return verifyReductionVarList(*this, getReductions(), getReductionVars()); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 3605e03ae886e..7e5e3c47e0d03 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -893,31 +893,32 @@ static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - auto loop = cast(opInst); + auto wsloop = cast(opInst); + auto loop = cast(wsloop.getWrappedLoop()); // TODO: this should be in the op verifier instead. if (loop.getLowerBound().empty()) return failure(); // Static is the default. auto schedule = - loop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static); + wsloop.getScheduleVal().value_or(omp::ClauseScheduleKind::Static); // Find the loop configuration. llvm::Value *step = moduleTranslation.lookupValue(loop.getStep()[0]); llvm::Type *ivType = step->getType(); llvm::Value *chunk = nullptr; - if (loop.getScheduleChunkVar()) { + if (wsloop.getScheduleChunkVar()) { llvm::Value *chunkVar = - moduleTranslation.lookupValue(loop.getScheduleChunkVar()); + moduleTranslation.lookupValue(wsloop.getScheduleChunkVar()); chunk = builder.CreateSExtOrTrunc(chunkVar, ivType); } SmallVector reductionDecls; - collectReductionDecls(loop, reductionDecls); + collectReductionDecls(wsloop, reductionDecls); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); DenseMap reductionVariableMap; - allocReductionVars(loop, builder, *ompBuilder, moduleTranslation, allocaIP, + allocReductionVars(wsloop, builder, *ompBuilder, moduleTranslation, allocaIP, reductionDecls, reductionVariableMap); // Store the mapping between reduction variables and their private copies on @@ -929,7 +930,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, // Before the loop, store the initial values of reductions into reduction // variables. Although this could be done after allocas, we don't want to mess // up with the alloca insertion point. - for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) { + for (unsigned i = 0; i < wsloop.getNumReductionVars(); ++i) { SmallVector phis; if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral", builder, @@ -1020,10 +1021,10 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, allocaIP = findAllocaInsertPoint(builder, moduleTranslation); // TODO: Handle doacross loops when the ordered clause has a parameter. - bool isOrdered = loop.getOrderedVal().has_value(); + bool isOrdered = wsloop.getOrderedVal().has_value(); std::optional scheduleModifier = - loop.getScheduleModifier(); - bool isSimd = loop.getSimdModifier(); + wsloop.getScheduleModifier(); + bool isSimd = wsloop.getSimdModifier(); bool distributeCodeGen = opInst.getParentOfType(); bool parallelCodeGen = opInst.getParentOfType(); @@ -1036,7 +1037,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, workshareLoopType = llvm::omp::WorksharingLoopType::ForStaticLoop; } ompBuilder->applyWorkshareLoop( - ompLoc.DL, loopInfo, allocaIP, !loop.getNowait(), + ompLoc.DL, loopInfo, allocaIP, !wsloop.getNowait(), convertToScheduleKind(schedule), chunk, isSimd, scheduleModifier == omp::ScheduleModifier::monotonic, scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered, @@ -1049,12 +1050,12 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, builder.restoreIP(afterIP); // Process the reductions if required. - if (loop.getNumReductionVars() == 0) + if (wsloop.getNumReductionVars() == 0) return success(); // Create the reduction generators. We need to own them here because // ReductionInfo only accepts references to the generators. - collectReductionInfo(loop, builder, *ompBuilder, moduleTranslation, + collectReductionInfo(wsloop, builder, *ompBuilder, moduleTranslation, reductionDecls); // The call to createReductions below expects the block to have a // terminator. Create an unreachable instruction to serve as terminator @@ -1063,12 +1064,12 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, builder.SetInsertPoint(tempTerminator); llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = - ompBuilder->createReductions(builder.saveIP(), allocaIP, - ompBuilder->RIManager.getReductionInfos(), - loop.getNowait(), /*IsTeamsReduction*/ false, - /*HasDistribute*/ distributeCodeGen); + ompBuilder->createReductions( + builder.saveIP(), allocaIP, ompBuilder->RIManager.getReductionInfos(), + wsloop.getNowait(), /*IsTeamsReduction*/ false, + /*HasDistribute*/ distributeCodeGen); if (!contInsertPoint.getBlock()) - return loop->emitOpError() << "failed to convert reductions"; + return wsloop->emitOpError() << "failed to convert reductions"; auto nextInsertionPoint = ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); tempTerminator->eraseFromParent(); @@ -1331,7 +1332,8 @@ convertOmpParallel(Operation &opInst1, llvm::IRBuilderBase &builder, static LogicalResult convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { - auto loop = cast(opInst); + auto simd = cast(opInst); + auto loop = cast(simd.getWrappedLoop()); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); @@ -1410,17 +1412,17 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); llvm::ConstantInt *simdlen = nullptr; - if (std::optional simdlenVar = loop.getSimdlen()) + if (std::optional simdlenVar = simd.getSimdlen()) simdlen = builder.getInt64(simdlenVar.value()); llvm::ConstantInt *safelen = nullptr; - if (std::optional safelenVar = loop.getSafelen()) + if (std::optional safelenVar = simd.getSafelen()) safelen = builder.getInt64(safelenVar.value()); llvm::MapVector alignedVars; ompBuilder->applySimd( loopInfo, alignedVars, - loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr()) + simd.getIfExpr() ? moduleTranslation.lookupValue(simd.getIfExpr()) : nullptr, llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen); @@ -3320,11 +3322,11 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); return success(); }) - .Case([&](omp::TaskwaitOp) { + .Case([&](omp::TaskWaitOp) { ompBuilder->createTaskwait(builder.saveIP()); return success(); }) - .Case([&](omp::TaskyieldOp) { + .Case([&](omp::TaskYieldOp) { ompBuilder->createTaskyield(builder.saveIP()); return success(); }) From 0319ceb9306e37077e05ad98c981fb49f0c30a5c Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Tue, 26 Mar 2024 13:04:28 +0000 Subject: [PATCH 2/2] Akash- Fix tests related to omp.wsloop. --- flang/test/Lower/OpenMP/FIR/copyin.f90 | 8 +- .../OpenMP/FIR/lastprivate-commonblock.f90 | 5 +- .../parallel-lastprivate-clause-scalar.f90 | 38 ++-- .../FIR/parallel-private-clause-fixes.f90 | 3 +- .../OpenMP/FIR/parallel-private-clause.f90 | 9 +- .../OpenMP/FIR/parallel-wsloop-firstpriv.f90 | 10 +- .../test/Lower/OpenMP/FIR/parallel-wsloop.f90 | 24 ++- .../Lower/OpenMP/FIR/stop-stmt-in-region.f90 | 3 +- flang/test/Lower/OpenMP/FIR/target.f90 | 3 +- flang/test/Lower/OpenMP/FIR/unstructured.f90 | 9 +- flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 | 11 +- .../test/Lower/OpenMP/FIR/wsloop-collapse.f90 | 3 +- .../Lower/OpenMP/FIR/wsloop-monotonic.f90 | 3 +- .../Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 | 3 +- .../test/Lower/OpenMP/FIR/wsloop-ordered.f90 | 6 +- .../Lower/OpenMP/FIR/wsloop-reduction-add.f90 | 18 +- .../OpenMP/FIR/wsloop-reduction-iand.f90 | 3 +- .../OpenMP/FIR/wsloop-reduction-ieor.f90 | 3 +- .../Lower/OpenMP/FIR/wsloop-reduction-ior.f90 | 5 +- .../FIR/wsloop-reduction-logical-eqv.f90 | 9 +- .../FIR/wsloop-reduction-logical-neqv.f90 | 9 +- .../Lower/OpenMP/FIR/wsloop-reduction-max.f90 | 6 +- .../Lower/OpenMP/FIR/wsloop-reduction-min.f90 | 6 +- flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 | 3 +- .../test/Lower/OpenMP/FIR/wsloop-variable.f90 | 15 +- flang/test/Lower/OpenMP/FIR/wsloop.f90 | 9 +- .../Todo/omp-default-clause-inner-loop.f90 | 3 +- flang/test/Lower/OpenMP/default-clause.f90 | 25 +-- flang/test/Lower/OpenMP/hlfir-wsloop.f90 | 3 +- .../Lower/OpenMP/lastprivate-commonblock.f90 | 3 +- flang/test/Lower/OpenMP/lastprivate-iv.f90 | 6 +- .../parallel-lastprivate-clause-scalar.f90 | 36 ++-- .../OpenMP/parallel-private-clause-fixes.f90 | 3 +- .../Lower/OpenMP/parallel-private-clause.f90 | 9 +- .../OpenMP/parallel-wsloop-firstpriv.f90 | 10 +- flang/test/Lower/OpenMP/parallel-wsloop.f90 | 24 ++- .../test/Lower/OpenMP/stop-stmt-in-region.f90 | 3 +- flang/test/Lower/OpenMP/target.f90 | 3 +- flang/test/Lower/OpenMP/unstructured.f90 | 9 +- flang/test/Lower/OpenMP/wsloop-chunks.f90 | 11 +- flang/test/Lower/OpenMP/wsloop-collapse.f90 | 3 +- flang/test/Lower/OpenMP/wsloop-monotonic.f90 | 3 +- .../test/Lower/OpenMP/wsloop-nonmonotonic.f90 | 3 +- flang/test/Lower/OpenMP/wsloop-ordered.f90 | 6 +- .../OpenMP/wsloop-reduction-add-hlfir.f90 | 3 +- .../Lower/OpenMP/wsloop-reduction-add.f90 | 21 +- .../Lower/OpenMP/wsloop-reduction-iand.f90 | 3 +- .../Lower/OpenMP/wsloop-reduction-ieor.f90 | 3 +- .../Lower/OpenMP/wsloop-reduction-ior.f90 | 3 +- .../OpenMP/wsloop-reduction-logical-and.f90 | 9 +- .../OpenMP/wsloop-reduction-logical-eqv.f90 | 9 +- .../OpenMP/wsloop-reduction-logical-neqv.f90 | 9 +- .../OpenMP/wsloop-reduction-logical-or.f90 | 9 +- .../OpenMP/wsloop-reduction-max-hlfir.f90 | 3 +- .../Lower/OpenMP/wsloop-reduction-max.f90 | 9 +- .../Lower/OpenMP/wsloop-reduction-min.f90 | 9 +- .../Lower/OpenMP/wsloop-reduction-mul.f90 | 21 +- flang/test/Lower/OpenMP/wsloop-simd.f90 | 3 +- .../test/Lower/OpenMP/wsloop-unstructured.f90 | 3 +- flang/test/Lower/OpenMP/wsloop-variable.f90 | 15 +- flang/test/Lower/OpenMP/wsloop.f90 | 9 +- flang/test/Transforms/DoConcurrent/basic.f90 | 3 +- mlir/test/CAPI/execution_engine.c | 5 +- .../OpenMPToLLVM/convert-to-llvmir.mlir | 11 +- .../Conversion/SCFToOpenMP/scf-to-openmp.mlir | 14 +- .../Dialect/LLVMIR/legalize-for-export.mlir | 5 +- mlir/test/Dialect/OpenMP/invalid.mlir | 63 ++++-- .../LLVMIR/omptarget-parallel-wsloop.mlir | 5 +- .../LLVMIR/omptarget-wsloop-collapsed.mlir | 5 +- mlir/test/Target/LLVMIR/omptarget-wsloop.mlir | 10 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 202 ++++++++++++------ mlir/test/Target/LLVMIR/openmp-nested.mlir | 10 +- mlir/test/Target/LLVMIR/openmp-reduction.mlir | 37 ++-- 73 files changed, 592 insertions(+), 306 deletions(-) diff --git a/flang/test/Lower/OpenMP/FIR/copyin.f90 b/flang/test/Lower/OpenMP/FIR/copyin.f90 index 20023a81977ae..7161722b21090 100644 --- a/flang/test/Lower/OpenMP/FIR/copyin.f90 +++ b/flang/test/Lower/OpenMP/FIR/copyin.f90 @@ -145,7 +145,8 @@ subroutine copyin_derived_type() ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_4]] : !fir.ref ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_9]] to %[[VAL_3]] : !fir.ref ! CHECK: fir.call @_QPsub4(%[[VAL_4]]) {{.*}}: (!fir.ref) -> () ! CHECK: omp.yield @@ -286,7 +287,8 @@ subroutine common_1() !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32 !CHECK: %[[val_19:.*]] = fir.load %[[val_13]] : !fir.ref !CHECK: %[[val_c1_i32_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) { !CHECK: fir.store %[[arg]] to %[[val_9]] : !fir.ref !CHECK: %[[val_20:.*]] = fir.load %[[val_16]] : !fir.ref !CHECK: %[[val_21:.*]] = fir.load %[[val_9]] : !fir.ref @@ -303,7 +305,7 @@ subroutine common_2() integer :: y common /d/ x, y !$omp threadprivate(/d/) - + !$omp parallel do copyin(/d/) do i = 1, x y = y + i diff --git a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 index 389bcba35f77f..f779fc9a775cb 100644 --- a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 +++ b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 @@ -1,4 +1,4 @@ -! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s !CHECK: func.func @_QPlastprivate_common() { !CHECK: %[[val_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} @@ -17,7 +17,8 @@ !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32 !CHECK: %[[val_c100_i32:.*]] = arith.constant 100 : i32 !CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) { !CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref !CHECK: %[[val_11:.*]] = arith.addi %[[arg]], %[[val_c1_i32_0]] : i32 !CHECK: %[[val_c0_i32:.*]] = arith.constant 0 : i32 diff --git a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 index 2060e2062c1a3..68867a0bd4149 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 @@ -9,16 +9,17 @@ !CHECK-DAG: %[[ARG1_REF:.*]] = fir.convert %[[ARG1_UNBOX]]#0 : (!fir.ref>) -> !fir.ref> !CHECK: omp.parallel { -!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", +!CHECK-DAG: %[[ARG1_PVT:.*]] = fir.alloca !fir.char<1,5> {bindc_name = "arg1", ! Check that we are accessing the clone inside the loop -!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK-DAG: omp.wsloop { +!CHECK-DAG: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { !CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX -!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] +!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] !CHECK-NEXT: %[[CNST:.*]] = arith.constant !CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[UNIT]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref -!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]] +!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT]] !CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]] !CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]]) !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]]) @@ -37,12 +38,12 @@ !CHECK-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_REF]] : (!fir.ref>) -> !fir.ref !CHECK-DAG: %[[CVT1:.*]] = fir.convert %[[ARG1_PVT]] : (!fir.ref>) -> !fir.ref !CHECK-DAG: fir.call @llvm.memmove.p0.p0.i64(%[[CVT]], %[[CVT1]]{{.*}}) -!CHECK-DAG: } +!CHECK-DAG: } !CHECK-DAG: omp.yield subroutine lastprivate_character(arg1) character(5) :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1(n:n) = 'c' @@ -55,7 +56,8 @@ subroutine lastprivate_character(arg1) !CHECK: func @_QPlastprivate_int(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}) { !CHECK-DAG: omp.parallel { !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1" -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -75,7 +77,7 @@ subroutine lastprivate_character(arg1) subroutine lastprivate_int(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 @@ -90,7 +92,8 @@ subroutine lastprivate_int(arg1) !CHECK: omp.parallel { !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -111,7 +114,7 @@ subroutine lastprivate_int(arg1) subroutine mult_lastprivate_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -127,7 +130,8 @@ subroutine mult_lastprivate_int(arg1, arg2) !CHECK: omp.parallel { !CHECK-DAG: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { !Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -148,7 +152,7 @@ subroutine mult_lastprivate_int(arg1, arg2) subroutine mult_lastprivate_int2(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1, arg2) do n = 1, 5 arg1 = 2 @@ -169,7 +173,8 @@ subroutine mult_lastprivate_int2(arg1, arg2) ! Lastprivate Allocation !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" !CHECK-NOT: omp.barrier -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -188,7 +193,7 @@ subroutine mult_lastprivate_int2(arg1, arg2) subroutine firstpriv_lastpriv_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -207,7 +212,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref !CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref !CHECK-NEXT: omp.barrier -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 @@ -225,7 +231,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) subroutine firstpriv_lastpriv_int2(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 index c99bf761333b8..ce46925327ba1 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 @@ -13,7 +13,8 @@ ! CHECK: %[[ONE:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[VAL_6]] to %[[PRIV_I]] : !fir.ref ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 index 8f5d280943cc2..204909e30cb96 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 @@ -259,7 +259,8 @@ subroutine simple_loop_1 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref @@ -295,7 +296,8 @@ subroutine simple_loop_2 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO PRIVATE(r) do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref @@ -330,7 +332,8 @@ subroutine simple_loop_3 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(r) do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 index 6eb39a2f63725..b26e618693316 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 @@ -3,7 +3,7 @@ ! RUN: bbc -fopenmp -emit-fir -hlfir=false %s -o - | FileCheck %s -! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) +! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) subroutine omp_do_firstprivate(a) integer::a integer::n @@ -17,7 +17,8 @@ subroutine omp_do_firstprivate(a) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK-NEXT: omp.wsloop for (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: omp.wsloop { + ! CHECK-NEXT: omp.loopnest (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield @@ -29,7 +30,7 @@ subroutine omp_do_firstprivate(a) call bar(a) end subroutine omp_do_firstprivate -! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) +! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) subroutine omp_do_firstprivate2(a, n) integer::a integer::n @@ -48,7 +49,8 @@ subroutine omp_do_firstprivate2(a, n) ! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK-NEXT: omp.wsloop for (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: omp.wsloop { + ! CHECK-NEXT: omp.loopnest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 index 8649cf284ffd9..b1003253822da 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 @@ -9,7 +9,8 @@ subroutine simple_parallel_do ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref @@ -35,7 +36,8 @@ subroutine parallel_do_with_parallel_clauses(cond, nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref @@ -58,7 +60,8 @@ subroutine parallel_do_with_clauses(nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop schedule(dynamic) { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref @@ -90,7 +93,8 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref @@ -140,7 +144,8 @@ end subroutine parallel_private_do ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV]] : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -182,7 +187,8 @@ end subroutine omp_parallel_multiple_firstprivate_do ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield @@ -224,7 +230,8 @@ end subroutine parallel_do_private ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -266,7 +273,8 @@ end subroutine omp_parallel_do_multiple_firstprivate ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 index d6c10bdee88d5..2c6f460ceacac 100644 --- a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 +++ b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 @@ -77,7 +77,8 @@ subroutine test_stop_in_region3() ! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[VAL_6]] to %[[VAL_0]] : !fir.ref ! CHECK: cf.br ^bb1 ! CHECK: ^bb1: diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90 index 3962603572ba0..9e113e2e59d8c 100644 --- a/flang/test/Lower/OpenMP/FIR/target.f90 +++ b/flang/test/Lower/OpenMP/FIR/target.f90 @@ -487,7 +487,8 @@ subroutine omp_target_parallel_do !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 !CHECK: %[[VAL_6:.*]] = arith.constant 1024 : i32 !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 - !CHECK: omp.wsloop for (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) { + !CHECK: omp.wsloop { + !CHECK: omp.loopnest (%[[VAL_8:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) { !CHECK: fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref !CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 !CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_4]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/unstructured.f90 b/flang/test/Lower/OpenMP/FIR/unstructured.f90 index bfaf38b7ef1af..390b3a8746e8b 100644 --- a/flang/test/Lower/OpenMP/FIR/unstructured.f90 +++ b/flang/test/Lower/OpenMP/FIR/unstructured.f90 @@ -67,14 +67,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct ! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 ! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 ! CHECK: ^bb2: // pred: ^bb1 -! CHECK: omp.wsloop for (%[[ARG1:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG1:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref ! CHECK: @_FortranAioBeginExternalListOutput ! CHECK: %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref ! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]]) ! CHECK: omp.yield ! CHECK: } -! CHECK: omp.wsloop for (%[[ARG2:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG2:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref ! CHECK: br ^bb1 ! CHECK: ^bb2: // 2 preds: ^bb1, ^bb5 @@ -117,7 +119,8 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs ! CHECK-LABEL: func @_QPss4{{.*}} { ! CHECK: omp.parallel { ! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned} -! CHECK: omp.wsloop for (%[[ARG:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref ! CHECK: %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}} ! CHECK: %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}} diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 index 4030f46299d0b..4b6498adb31de 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 @@ -19,7 +19,8 @@ program wsloop ! CHECK: %[[VAL_3:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait for (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { ! CHECK: fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 @@ -37,7 +38,8 @@ program wsloop ! CHECK: %[[VAL_15:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait for (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { ! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref ! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 ! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref @@ -45,7 +47,7 @@ program wsloop ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } - + end do !$OMP END DO NOWAIT chunk = 6 @@ -61,7 +63,8 @@ program wsloop ! CHECK: %[[VAL_30:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref -! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait for (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { ! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref ! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 ! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 index 933fc0910e338..aa66b49edf9ec 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 @@ -39,7 +39,8 @@ program wsloop_collapse do i = 1, a do j= 1, b do k = 1, c -! CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) { ! CHECK: fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref ! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref ! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 index 1c381475f6cbb..70f0e7d00b4f9 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 @@ -15,7 +15,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(dynamic, monotonic) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 index 3f425200b8fa4..9170a75b8248e 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 @@ -16,7 +16,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 index 7548d7a597228..9b3daba2f170b 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-ordered.f90 @@ -6,7 +6,8 @@ subroutine wsloop_ordered_no_para() integer :: a(10), i -! CHECK: omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +! CHECK: omp.wsloop ordered(0) { +! CHECK: omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { ! CHECK: omp.yield ! CHECK: } @@ -25,7 +26,8 @@ subroutine wsloop_ordered_with_para() integer :: a(10), i ! CHECK: func @_QPwsloop_ordered_with_para() { -! CHECK: omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +! CHECK: omp.wsloop ordered(1) { +! CHECK: omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { ! CHECK: omp.yield ! CHECK: } diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 index 5664529416fe8..a620cd4852965 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 @@ -61,7 +61,8 @@ ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { ! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref @@ -97,7 +98,8 @@ subroutine simple_int_reduction ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { ! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_3]] : !fir.ref @@ -133,7 +135,8 @@ subroutine simple_real_reduction ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) for (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_7:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_8:.*]]) : i32 = (%[[VAL_4]]) to (%[[VAL_5]]) inclusive step (%[[VAL_6]]) { ! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref ! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]] : !fir.ref @@ -210,7 +213,8 @@ subroutine simple_real_reduction_switch_order ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref @@ -263,7 +267,8 @@ subroutine multiple_int_reductions_same_type ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_1]] -> %[[VAL_11:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_2]] -> %[[VAL_12:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_3]] -> %[[VAL_13:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]] : !fir.ref ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]] : !fir.ref @@ -322,7 +327,8 @@ subroutine multiple_real_reductions_same_type ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref) for (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_2]] -> %[[VAL_13:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_3]] -> %[[VAL_14:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_4]] -> %[[VAL_15:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_1]] -> %[[VAL_16:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_17:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) { ! CHECK: fir.store %[[VAL_17]] to %[[VAL_9]] : !fir.ref ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_13]] : !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_9]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 index 9ce1725dbab04..c5401204c0ca6 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-iand.f90 @@ -13,7 +13,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iandEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IAND_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 index f6027416246af..7775bb4c2dc97 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ieor.f90 @@ -13,7 +13,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_ieorEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 index bc143611abe8d..cef02492931c1 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-ior.f90 @@ -10,10 +10,11 @@ !CHECK: omp.yield(%[[IOR_VAL_I]] : i32) !CHECK-LABEL: @_QPreduction_ior -!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> +!CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_iorEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IOR_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 index d5aacd74d8b10..7592da1a8844d 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 @@ -30,7 +30,8 @@ ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref> ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref @@ -72,7 +73,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64 @@ -122,7 +124,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @eqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @eqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref> ! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 index 9f44e0e26d407..b8cdf4bf46c7f 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 @@ -31,7 +31,8 @@ ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref> ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_5]] : !fir.ref @@ -73,7 +74,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_7:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) for (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_9:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_10:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { ! CHECK: fir.store %[[VAL_10]] to %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_5]] : !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i32) -> i64 @@ -123,7 +125,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) for (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_2]] -> %[[VAL_15:.*]] : !fir.ref>, @neqv_reduction %[[VAL_3]] -> %[[VAL_16:.*]] : !fir.ref>, @neqv_reduction %[[VAL_4]] -> %[[VAL_17:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_18:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_18]] to %[[VAL_11]] : !fir.ref ! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_15]] : !fir.ref> ! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_11]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 index af79658491b56..85702c5dc020d 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-max.f90 @@ -21,7 +21,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_max_intEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref @@ -34,7 +35,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_max_realEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MAX_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 index 1095718b4b13f..b82d943009e90 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-min.f90 @@ -21,7 +21,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFreduction_min_intEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_I]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref @@ -35,7 +36,8 @@ !CHECK-SAME: %[[Y_BOX:.*]]: !fir.box> !CHECK: %[[X_REF:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFreduction_min_realEx"} !CHECK: omp.parallel -!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[MIN_DECLARE_F]] %[[X_REF]] -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: %[[LPRV:.+]] = fir.load %[[PRV]] : !fir.ref !CHECK: %[[Y_I_REF:.*]] = fir.coordinate_of %[[Y_BOX]] !CHECK: %[[Y_I:.*]] = fir.load %[[Y_I_REF]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 index 2e3f8ca3c207d..446ad4279a682 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 @@ -14,7 +14,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(runtime, simd) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[STORE:.*]] : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 index 4f34f30f3e7c9..f00b7ec0979cb 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 @@ -22,7 +22,8 @@ program wsloop_variable !CHECK: %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64 !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 -!CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]] : !fir.ref !CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref @@ -46,7 +47,8 @@ program wsloop_variable !CHECK: %[[TMP12:.*]] = arith.constant 1 : i32 !CHECK: %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32 !CHECK: %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32 -!CHECK: omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE3:.*]] : !fir.ref !CHECK: %[[LOAD3:.*]] = fir.load %[[STORE3]] : !fir.ref @@ -65,7 +67,8 @@ program wsloop_variable !CHECK: %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64 !CHECK: %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64 !CHECK: %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64 -!CHECK: omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]]) { !CHECK: %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128 !CHECK: fir.store %[[ARG1_I128]] to %[[STORE4:.*]] : !fir.ref !CHECK: %[[LOAD4:.*]] = fir.load %[[STORE4]] : !fir.ref @@ -97,7 +100,8 @@ end program wsloop_variable !CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref !CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32 !CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32 -!CHECK: omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE_IV:.*]] : !fir.ref !CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref @@ -146,7 +150,8 @@ subroutine wsloop_variable_sub !CHECK: %[[C1:.*]] = arith.constant 1 : i32 !CHECK: %[[C10:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[C1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) { !CHECK: %[[ARG0_I8:.*]] = fir.convert %[[ARG0]] : (i32) -> i8 !CHECK: fir.store %[[ARG0_I8]] to %[[IV2]] : !fir.ref !CHECK: %[[IV2LOAD:.*]] = fir.load %[[IV2]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/wsloop.f90 b/flang/test/Lower/OpenMP/FIR/wsloop.f90 index abc0489b08ff5..66972c73d2eb0 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop.f90 @@ -11,7 +11,8 @@ subroutine simple_loop ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref @@ -34,7 +35,8 @@ subroutine simple_loop_with_step ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 2 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref !$OMP DO @@ -57,7 +59,8 @@ subroutine loop_with_schedule_nowait ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop schedule(runtime) nowait { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO SCHEDULE(runtime) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 index 5c624d31b5f36..4c084d81ffa89 100644 --- a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 +++ b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 @@ -12,7 +12,8 @@ ! CHECK: %[[const_1:.*]] = arith.constant 1 : i32 ! CHECK: %[[const_2:.*]] = arith.constant 10 : i32 ! CHECK: %[[const_3:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) { ! CHECK: fir.store %[[ARG]] to %[[TEMP]] : !fir.ref ! EXPECTED: %[[temp_1:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref ! CHECK: %[[temp_1:.*]] = fir.load %{{.*}} : !fir.ref diff --git a/flang/test/Lower/OpenMP/default-clause.f90 b/flang/test/Lower/OpenMP/default-clause.f90 index 0e118742689d6..401ed4f612467 100644 --- a/flang/test/Lower/OpenMP/default-clause.f90 +++ b/flang/test/Lower/OpenMP/default-clause.f90 @@ -192,7 +192,7 @@ subroutine nested_default_clause_tests !CHECK: %[[INNER_PRIVATE_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFnested_default_clause_testsEk"} !CHECK: %[[INNER_PRIVATE_K_DECL:.*]]:2 = hlfir.declare %[[INNER_PRIVATE_K]] {uniq_name = "_QFnested_default_clause_testsEk"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_K_DECL]]#0 : !fir.ref -!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref +!CHECK: hlfir.assign %[[TEMP]] to %[[INNER_PRIVATE_K_DECL]]#0 temporary_lhs : i32, !fir.ref !CHECK: %[[CONST:.*]] = arith.constant 30 : i32 !CHECK: hlfir.assign %[[CONST]] to %[[PRIVATE_Y_DECL]]#0 : i32, !fir.ref !CHECK: %[[CONST:.*]] = arith.constant 40 : i32 @@ -205,21 +205,21 @@ subroutine nested_default_clause_tests !CHECK: } !CHECK: omp.terminator !CHECK: } - !$omp parallel firstprivate(x) private(y) shared(w) default(private) + !$omp parallel firstprivate(x) private(y) shared(w) default(private) !$omp parallel default(private) y = 20 - x = 10 - !$omp end parallel + x = 10 + !$omp end parallel - !$omp parallel default(firstprivate) shared(y) private(w) + !$omp parallel default(firstprivate) shared(y) private(w) y = 30 - w = 40 + w = 40 z = 50 k = 40 !$omp end parallel !$omp end parallel - - + + !CHECK: omp.parallel { !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFnested_default_clause_testsEy"} @@ -260,8 +260,8 @@ subroutine nested_default_clause_tests !$omp parallel default(private) shared(z) w = x + z !$omp end parallel - !$omp end parallel - + !$omp end parallel + !CHECK: omp.parallel { !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"} !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFnested_default_clause_testsEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -319,7 +319,7 @@ subroutine nested_default_clause_tests !CHECK: omp.terminator !CHECK: } !CHECK: return -!CHECK: } +!CHECK: } !$omp parallel default(firstprivate) !$omp single x = y @@ -352,7 +352,8 @@ subroutine skipped_default_clause_checks() type(it)::iii !CHECK: omp.parallel { -!CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref) for (%[[ARG:.*]]) {{.*}} { +!CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_Z_DECLARE]]#0 -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest (%[[ARG:.*]]) {{.*}} { !CHECK: omp.yield !CHECK: } !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 b/flang/test/Lower/OpenMP/hlfir-wsloop.f90 index b6be77fe3016d..307be89fb5ecb 100644 --- a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 +++ b/flang/test/Lower/OpenMP/hlfir-wsloop.f90 @@ -12,7 +12,8 @@ subroutine simple_loop !$OMP PARALLEL ! CHECK-DAG: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[IV:.*]] = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref) -> !fir.ref - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]]) !$OMP DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV:.*]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 index a11bdee156637..45450a5a8fd9b 100644 --- a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 +++ b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 @@ -15,7 +15,8 @@ !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X_REF]] {uniq_name = "_QFlastprivate_commonEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[PRIVATE_Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate_commonEy"} !CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y_REF]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { !CHECK: %[[V:.*]] = arith.addi %[[I]], %{{.*}} : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 !CHECK: %[[NEG_STEP:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 diff --git a/flang/test/Lower/OpenMP/lastprivate-iv.f90 b/flang/test/Lower/OpenMP/lastprivate-iv.f90 index 70fe500129d12..16204ed79b019 100644 --- a/flang/test/Lower/OpenMP/lastprivate-iv.f90 +++ b/flang/test/Lower/OpenMP/lastprivate-iv.f90 @@ -9,7 +9,8 @@ !CHECK: %[[LB:.*]] = arith.constant 4 : i32 !CHECK: %[[UB:.*]] = arith.constant 10 : i32 !CHECK: %[[STEP:.*]] = arith.constant 3 : i32 -!CHECK: omp.wsloop for (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !CHECK: fir.store %[[IV]] to %[[I]]#1 : !fir.ref !CHECK: %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 @@ -41,7 +42,8 @@ subroutine lastprivate_iv_inc() !CHECK: %[[LB:.*]] = arith.constant 10 : i32 !CHECK: %[[UB:.*]] = arith.constant 1 : i32 !CHECK: %[[STEP:.*]] = arith.constant -3 : i32 -!CHECK: omp.wsloop for (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !CHECK: fir.store %[[IV]] to %[[I]]#1 : !fir.ref !CHECK: %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 index 28f59c95d60bb..cf4f028987022 100644 --- a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 +++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 @@ -14,13 +14,14 @@ !CHECK-DAG: %[[ARG1_PVT_DECL:.*]]:2 = hlfir.declare %[[ARG1_PVT]] typeparams %[[FIVE]] {uniq_name = "_QFlastprivate_characterEarg1"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) ! Check that we are accessing the clone inside the loop -!CHECK-DAG: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK-DAG: omp.wsloop { +!CHECK-DAG: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { !CHECK-DAG: %[[UNIT:.*]] = arith.constant 6 : i32 !CHECK-NEXT: %[[ADDR:.*]] = fir.address_of(@_QQclX -!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] +!CHECK-NEXT: %[[CVT0:.*]] = fir.convert %[[ADDR]] !CHECK-NEXT: %[[CNST:.*]] = arith.constant !CHECK-NEXT: %[[CALL_BEGIN_IO:.*]] = fir.call @_FortranAioBeginExternalListOutput(%[[UNIT]], %[[CVT0]], %[[CNST]]) {{.*}}: (i32, !fir.ref, i32) -> !fir.ref -!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#1 +!CHECK-NEXT: %[[CVT_0_1:.*]] = fir.convert %[[ARG1_PVT_DECL]]#1 !CHECK-NEXT: %[[CVT_0_2:.*]] = fir.convert %[[FIVE]] !CHECK-NEXT: %[[CALL_OP_ASCII:.*]] = fir.call @_FortranAioOutputAscii(%[[CALL_BEGIN_IO]], %[[CVT_0_1]], %[[CVT_0_2]]) !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]]) @@ -37,12 +38,12 @@ ! Testing lastprivate val update !CHECK-DAG: hlfir.assign %[[ARG1_PVT_DECL]]#0 to %[[ARG1_DECL]]#0 temporary_lhs : !fir.ref>, !fir.ref> -!CHECK-DAG: } +!CHECK-DAG: } !CHECK-DAG: omp.yield subroutine lastprivate_character(arg1) character(5) :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1(n:n) = 'c' @@ -57,7 +58,8 @@ subroutine lastprivate_character(arg1) !CHECK-DAG: omp.parallel { !CHECK-DAG: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "arg1" !CHECK-DAG: %[[CLONE_DECL:.*]]:2 = hlfir.declare %[[CLONE]] {uniq_name = "_QFlastprivate_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -77,7 +79,7 @@ subroutine lastprivate_character(arg1) subroutine lastprivate_int(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 @@ -96,7 +98,8 @@ subroutine lastprivate_int(arg1) !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_intEarg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_intEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -117,7 +120,7 @@ subroutine lastprivate_int(arg1) subroutine mult_lastprivate_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -137,7 +140,8 @@ subroutine mult_lastprivate_int(arg1, arg2) !CHECK-DAG: %[[CLONE1_DECL:.*]]:2 = hlfir.declare %[[CLONE1]] {uniq_name = "_QFmult_lastprivate_int2Earg1"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK-DAG: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" !CHECK-DAG: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFmult_lastprivate_int2Earg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { !Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -158,7 +162,7 @@ subroutine mult_lastprivate_int(arg1, arg2) subroutine mult_lastprivate_int2(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO LASTPRIVATE(arg1, arg2) do n = 1, 5 arg1 = 2 @@ -183,7 +187,8 @@ subroutine mult_lastprivate_int2(arg1, arg2) !CHECK: %[[CLONE2:.*]] = fir.alloca i32 {bindc_name = "arg2" !CHECK: %[[CLONE2_DECL:.*]]:2 = hlfir.declare %[[CLONE2]] {uniq_name = "_QFfirstpriv_lastpriv_intEarg2"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK-NOT: omp.barrier -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 @@ -202,7 +207,7 @@ subroutine mult_lastprivate_int2(arg1, arg2) subroutine firstpriv_lastpriv_int(arg1, arg2) integer :: arg1, arg2 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg2) do n = 1, 5 arg1 = 2 @@ -223,7 +228,8 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1_DECL]]#0 : !fir.ref !CHECK-NEXT: hlfir.assign %[[FPV_LD]] to %[[CLONE1_DECL]]#0 temporary_lhs : i32, !fir.ref !CHECK-NEXT: omp.barrier -!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check !CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 !CHECK: %[[C0:.*]] = arith.constant 0 : i32 @@ -241,7 +247,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) subroutine firstpriv_lastpriv_int2(arg1) integer :: arg1 -!$OMP PARALLEL +!$OMP PARALLEL !$OMP DO FIRSTPRIVATE(arg1) LASTPRIVATE(arg1) do n = 1, 5 arg1 = 2 diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 index 8533106b7ac48..53846bef2d4f9 100644 --- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 +++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 @@ -21,7 +21,8 @@ ! CHECK: %[[ONE:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[GAMA_DECL]]#0 : !fir.ref ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[VAL_6]] to %[[PRIV_I_DECL]]#1 : !fir.ref ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index diff --git a/flang/test/Lower/OpenMP/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/parallel-private-clause.f90 index 5578b6710da7c..e870d8f8f0669 100644 --- a/flang/test/Lower/OpenMP/parallel-private-clause.f90 +++ b/flang/test/Lower/OpenMP/parallel-private-clause.f90 @@ -304,7 +304,8 @@ subroutine simple_loop_1 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref @@ -342,7 +343,8 @@ subroutine simple_loop_2 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO PRIVATE(r) do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV_DECL]]#1 : !fir.ref @@ -379,7 +381,8 @@ subroutine simple_loop_3 ! FIRDialect: %[[WS_UB:.*]] = arith.constant 9 : i32 ! FIRDialect: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! FIRDialect: omp.wsloop { + ! FIRDialect: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(r) do i=1, 9 ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV_DECL:.*]]#1 : !fir.ref diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 index 716a7d71bb628..9eb05978b2d4a 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 @@ -3,7 +3,7 @@ ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s -! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) +! CHECK: func @_QPomp_do_firstprivate(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}) subroutine omp_do_firstprivate(a) ! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_do_firstprivateEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) integer::a @@ -20,7 +20,8 @@ subroutine omp_do_firstprivate(a) ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK-NEXT: omp.wsloop for (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: omp.wsloop { + ! CHECK-NEXT: omp.loopnest (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ! CHECK-NEXT: fir.store %[[ARG1]] to %[[I_PVT_DECL]]#1 : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield @@ -32,7 +33,7 @@ subroutine omp_do_firstprivate(a) call bar(a) end subroutine omp_do_firstprivate -! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) +! CHECK: func @_QPomp_do_firstprivate2(%[[ARG0:.*]]: !fir.ref {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "n"}) subroutine omp_do_firstprivate2(a, n) ! CHECK: %[[ARG0_DECL:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_do_firstprivate2Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[ARG1_DECL:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFomp_do_firstprivate2En"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -56,7 +57,8 @@ subroutine omp_do_firstprivate2(a, n) ! CHECK: %[[LB:.*]] = fir.load %[[A_PVT_DECL]]#0 : !fir.ref ! CHECK: %[[UB:.*]] = fir.load %[[N_PVT_DECL]]#0 : !fir.ref ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) ! CHECK: fir.store %[[ARG2]] to %[[I_PVT_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PVT_DECL]]#1, %[[A_PVT_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/parallel-wsloop.f90 index c06f941b74b58..fcb2791554ab0 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop.f90 @@ -9,7 +9,8 @@ subroutine simple_parallel_do ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref @@ -37,7 +38,8 @@ subroutine parallel_do_with_parallel_clauses(cond, nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref @@ -61,7 +63,8 @@ subroutine parallel_do_with_clauses(nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop schedule(dynamic) { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref @@ -97,7 +100,8 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt) ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]]#1 : !fir.ref @@ -150,7 +154,8 @@ end subroutine parallel_private_do ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -196,7 +201,8 @@ end subroutine omp_parallel_multiple_firstprivate_do ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield @@ -241,7 +247,8 @@ end subroutine parallel_do_private ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV_DECL]]#1, %[[COND_PRIV_DECL]]#1, %[[NT_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -287,7 +294,8 @@ end subroutine omp_parallel_do_multiple_firstprivate ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { ! CHECK: fir.store %[[I]] to %[[I_PRIV_DECL]]#1 : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_DECL]]#1, %[[A_PRIV_DECL]]#1) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 index fdbabc21b2c9e..fd80c563a8f4f 100644 --- a/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 +++ b/flang/test/Lower/OpenMP/stop-stmt-in-region.f90 @@ -82,7 +82,8 @@ subroutine test_stop_in_region3() ! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { ! CHECK: fir.store %[[VAL_6]] to %[[VAL_0_DECL]]#1 : !fir.ref ! CHECK: cf.br ^bb1 ! CHECK: ^bb1: diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index 43598aff08bfe..e1c2a1f0966f5 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -594,7 +594,8 @@ subroutine omp_target_parallel_do !$omp target parallel do map(tofrom: a) !CHECK: %[[I_PVT_ALLOCA:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_ALLOCA]] {uniq_name = "_QFomp_target_parallel_doEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) - !CHECK: omp.wsloop for (%[[I_VAL:.*]]) : i32 + !CHECK: omp.wsloop { + !CHECK: omp.loopnest (%[[I_VAL:.*]]) : i32 do i = 1, 1024 !CHECK: fir.store %[[I_VAL]] to %[[I_PVT_DECL]]#1 : !fir.ref !CHECK: %[[C10:.*]] = arith.constant 10 : i32 diff --git a/flang/test/Lower/OpenMP/unstructured.f90 b/flang/test/Lower/OpenMP/unstructured.f90 index e5bf980ce90fd..13d3c2bf04206 100644 --- a/flang/test/Lower/OpenMP/unstructured.f90 +++ b/flang/test/Lower/OpenMP/unstructured.f90 @@ -70,14 +70,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct ! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 ! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 ! CHECK: ^bb2: // pred: ^bb1 -! CHECK: omp.wsloop for (%[[ARG1:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG1:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG1]] to %[[OMP_LOOP_K_DECL]]#1 : !fir.ref ! CHECK: @_FortranAioBeginExternalListOutput ! CHECK: %[[LOAD_1:.*]] = fir.load %[[OMP_LOOP_K_DECL]]#0 : !fir.ref ! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]]) ! CHECK: omp.yield ! CHECK: } -! CHECK: omp.wsloop for (%[[ARG2:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG2:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG2]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref ! CHECK: br ^bb1 ! CHECK: ^bb2: // 2 preds: ^bb1, ^bb5 @@ -121,7 +123,8 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs ! CHECK: omp.parallel { ! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[OMP_LOOP_J_DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFss4Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) -! CHECK: omp.wsloop for (%[[ARG:.*]]) : {{.*}} { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[ARG:.*]]) : {{.*}} { ! CHECK: fir.store %[[ARG]] to %[[OMP_LOOP_J_DECL]]#1 : !fir.ref ! CHECK: %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}} ! CHECK: %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}} diff --git a/flang/test/Lower/OpenMP/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/wsloop-chunks.f90 index 5016c8985bda0..2fe787150de32 100644 --- a/flang/test/Lower/OpenMP/wsloop-chunks.f90 +++ b/flang/test/Lower/OpenMP/wsloop-chunks.f90 @@ -20,7 +20,8 @@ program wsloop ! CHECK: %[[VAL_3:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait for (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { ! CHECK: fir.store %[[ARG0]] to %[[STORE_IV:.*]]#1 : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]]#0 : !fir.ref ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 @@ -38,7 +39,8 @@ program wsloop ! CHECK: %[[VAL_15:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 -! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait for (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { ! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref ! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 ! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]]#0 : !fir.ref @@ -46,7 +48,7 @@ program wsloop ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } - + end do !$OMP END DO NOWAIT chunk = 6 @@ -62,7 +64,8 @@ program wsloop ! CHECK: %[[VAL_30:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]]#0 : !fir.ref -! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait for (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { +! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait { +! CHECK: omp.loopnest (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { ! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]]#1 : !fir.ref ! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 ! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90 index c93fcf4ef968d..85e1134e68720 100644 --- a/flang/test/Lower/OpenMP/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90 @@ -49,7 +49,8 @@ program wsloop_collapse !CHECK: %[[VAL_30:.*]] = arith.constant 1 : i32 !CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref !CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { !$omp do collapse(3) do i = 1, a do j= 1, b diff --git a/flang/test/Lower/OpenMP/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/wsloop-monotonic.f90 index fba9105b98181..5d0cfb337bb6e 100644 --- a/flang/test/Lower/OpenMP/wsloop-monotonic.f90 +++ b/flang/test/Lower/OpenMP/wsloop-monotonic.f90 @@ -15,7 +15,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(dynamic, monotonic) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]]#1 : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 index 1bd7a2edc0f52..024c4ebf433fb 100644 --- a/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 +++ b/flang/test/Lower/OpenMP/wsloop-nonmonotonic.f90 @@ -17,7 +17,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[ALLOCA_IV]]#1 : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/wsloop-ordered.f90 b/flang/test/Lower/OpenMP/wsloop-ordered.f90 index 5185d2d085bac..5483b9228e035 100644 --- a/flang/test/Lower/OpenMP/wsloop-ordered.f90 +++ b/flang/test/Lower/OpenMP/wsloop-ordered.f90 @@ -6,7 +6,8 @@ subroutine wsloop_ordered_no_para() integer :: a(10), i -! CHECK: omp.wsloop ordered(0) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +! CHECK: omp.wsloop ordered(0) { +! CHECK: omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { ! CHECK: omp.yield ! CHECK: } @@ -25,7 +26,8 @@ subroutine wsloop_ordered_with_para() integer :: a(10), i ! CHECK: func @_QPwsloop_ordered_with_para() { -! CHECK: omp.wsloop ordered(1) for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { +! CHECK: omp.wsloop ordered(1) { +! CHECK: omp.loopnest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { ! CHECK: omp.yield ! CHECK: } diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 index 4d30282fc8c21..bd9f68ecc7e4b 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 @@ -27,7 +27,8 @@ ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 index 7df4f37b98df8..a8f4e50f6a51c 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-add.f90 @@ -68,7 +68,8 @@ ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref @@ -108,7 +109,8 @@ subroutine simple_int_reduction ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref @@ -149,7 +151,8 @@ subroutine simple_real_reduction ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref @@ -188,7 +191,8 @@ subroutine simple_int_reduction_switch_order ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref @@ -236,7 +240,8 @@ subroutine simple_real_reduction_switch_order ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { ! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref ! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -297,7 +302,8 @@ subroutine multiple_int_reductions_same_type ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: omp.wsloop reduction(@add_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { ! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref ! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -365,7 +371,8 @@ subroutine multiple_real_reductions_same_type ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) for (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { +! CHECK: omp.wsloop reduction(@add_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @add_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @add_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @add_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { ! CHECK: fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref ! CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 index 9588531f6c909..6af928f6c9fb9 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-iand.f90 @@ -29,7 +29,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@iand_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@iand_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iandEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 index a14a37101874c..8a7d9366cf387 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-ieor.f90 @@ -19,7 +19,8 @@ !CHECK: omp.parallel !CHECK: %[[I_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I_REF]] {uniq_name = "_QFreduction_ieorEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref) for +!CHECK: omp.wsloop reduction(@[[IEOR_DECLARE_I]] %[[X_DECL]]#0 -> %[[PRV:.+]] : !fir.ref) { +!CHECK: omp.loopnest !CHECK: fir.store %{{.*}} to %[[I_DECL]]#1 : !fir.ref !CHECK: %[[PRV_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[I_32:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 index 3b5e327439358..8f28916724267 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-ior.f90 @@ -29,7 +29,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@ior_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) +! CHECK: omp.wsloop reduction(@ior_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_iorEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 index 17d321620cca8..4303f15990ecc 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-and.f90 @@ -36,7 +36,8 @@ ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> @@ -84,7 +85,8 @@ end subroutine simple_reduction ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref @@ -141,7 +143,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: omp.wsloop reduction(@and_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @and_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @and_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { ! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref ! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 index 8204e4c878cb0..994477f4edcec 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-eqv.f90 @@ -36,7 +36,8 @@ ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> @@ -83,7 +84,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref @@ -140,7 +142,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: omp.wsloop reduction(@eqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @eqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @eqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { ! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref ! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 index 623368a50e864..29849b68be572 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-neqv.f90 @@ -36,7 +36,8 @@ ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> @@ -84,7 +85,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref @@ -143,7 +145,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: omp.wsloop reduction(@neqv_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @neqv_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @neqv_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { ! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref ! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 index f1ae1bc687cd5..719677ca729f2 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-logical-or.f90 @@ -36,7 +36,8 @@ ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reductionEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_17]]#0 : !fir.ref> @@ -83,7 +84,8 @@ subroutine simple_reduction(y) ! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_13:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) for (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { +! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_4]]#0 -> %[[VAL_15:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_16:.*]]) : i32 = (%[[VAL_12]]) to (%[[VAL_13]]) inclusive step (%[[VAL_14]]) { ! CHECK: fir.store %[[VAL_16]] to %[[VAL_11]]#1 : !fir.ref ! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsimple_reduction_switch_orderEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref @@ -140,7 +142,8 @@ subroutine simple_reduction_switch_order(y) ! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_21:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) for (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: omp.wsloop reduction(@or_reduction %[[VAL_7]]#0 -> %[[VAL_23:.*]] : !fir.ref>, @or_reduction %[[VAL_9]]#0 -> %[[VAL_24:.*]] : !fir.ref>, @or_reduction %[[VAL_11]]#0 -> %[[VAL_25:.*]] : !fir.ref>) { +! CHECK: omp.loopnest (%[[VAL_26:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { ! CHECK: fir.store %[[VAL_26]] to %[[VAL_19]]#1 : !fir.ref ! CHECK: %[[VAL_27:.*]]:2 = hlfir.declare %[[VAL_23]] {uniq_name = "_QFmultiple_reductionsEx"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_24]] {uniq_name = "_QFmultiple_reductionsEy"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 index ed25cedae90c6..45ae04f3712ac 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-hlfir.f90 @@ -29,7 +29,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 index ea3b1bebce038..09856f66b1e2c 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 @@ -40,7 +40,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@max_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref @@ -69,7 +70,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref @@ -88,7 +90,8 @@ ! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_33:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_34:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) for (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { +! CHECK: omp.wsloop reduction(@max_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { ! CHECK: fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref ! CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_max_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 index 3aa9001869dc5..eaac48b9e1b5a 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 @@ -40,7 +40,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@min_i_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref @@ -69,7 +70,8 @@ ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) for (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { +! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_12:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_13:.*]]) : i32 = (%[[VAL_9]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) { ! CHECK: fir.store %[[VAL_13]] to %[[VAL_8]]#1 : !fir.ref ! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref @@ -90,7 +92,8 @@ ! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_33:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_34:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) for (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { +! CHECK: omp.wsloop reduction(@min_f_32 %[[VAL_4]]#0 -> %[[VAL_35:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_36:.*]]) : i32 = (%[[VAL_32]]) to (%[[VAL_33]]) inclusive step (%[[VAL_34]]) { ! CHECK: fir.store %[[VAL_36]] to %[[VAL_31]]#1 : !fir.ref ! CHECK: %[[VAL_37:.*]]:2 = hlfir.declare %[[VAL_35]] {uniq_name = "_QFreduction_min_realEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_31]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 index 4774fba3f33e9..4656960a45ba2 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-mul.f90 @@ -61,7 +61,8 @@ ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref @@ -97,7 +98,8 @@ subroutine simple_int_reduction ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref @@ -134,7 +136,8 @@ subroutine simple_real_reduction ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_int_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref @@ -170,7 +173,8 @@ subroutine simple_int_reduction_switch_order ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_10:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { ! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref ! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFsimple_real_reduction_switch_orderEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref @@ -215,7 +219,8 @@ subroutine simple_real_reduction_switch_order ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_i_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { ! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref ! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_int_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_int_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -273,7 +278,8 @@ subroutine multiple_int_reductions_same_type ! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_14:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) for (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_f_32 %[[VAL_3]]#0 -> %[[VAL_16:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_5]]#0 -> %[[VAL_17:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_7]]#0 -> %[[VAL_18:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_19:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) { ! CHECK: fir.store %[[VAL_19]] to %[[VAL_12]]#1 : !fir.ref ! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFmultiple_real_reductions_same_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_17]] {uniq_name = "_QFmultiple_real_reductions_same_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -338,7 +344,8 @@ subroutine multiple_real_reductions_same_type ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @multiply_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @multiply_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) for (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { +! CHECK: omp.wsloop reduction(@multiply_reduction_i_32 %[[VAL_5]]#0 -> %[[VAL_19:.*]] : !fir.ref, @multiply_reduction_i_64 %[[VAL_7]]#0 -> %[[VAL_20:.*]] : !fir.ref, @multiply_reduction_f_32 %[[VAL_9]]#0 -> %[[VAL_21:.*]] : !fir.ref, @multiply_reduction_f_64 %[[VAL_3]]#0 -> %[[VAL_22:.*]] : !fir.ref) { +! CHECK: omp.loopnest (%[[VAL_23:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { ! CHECK: fir.store %[[VAL_23]] to %[[VAL_15]]#1 : !fir.ref ! CHECK: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFmultiple_reductions_different_typeEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFmultiple_reductions_different_typeEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90 index c3d5e3e0cda59..490f1665a7b1b 100644 --- a/flang/test/Lower/OpenMP/wsloop-simd.f90 +++ b/flang/test/Lower/OpenMP/wsloop-simd.f90 @@ -14,7 +14,8 @@ program wsloop_dynamic !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) +!CHECK: omp.wsloop schedule(runtime, simd) nowait { +!CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !CHECK: fir.store %[[I]] to %[[STORE:.*]]#1 : !fir.ref do i=1, 9 diff --git a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 b/flang/test/Lower/OpenMP/wsloop-unstructured.f90 index 7fe63a1fe607c..f5c1a8126edc1 100644 --- a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 +++ b/flang/test/Lower/OpenMP/wsloop-unstructured.f90 @@ -29,7 +29,8 @@ end subroutine sub ! CHECK-SAME: %[[VAL_2:.*]]: !fir.ref> {fir.bindc_name = "x"}, ! CHECK-SAME: %[[VAL_3:.*]]: !fir.ref> {fir.bindc_name = "y"}) { ! [...] -! CHECK: omp.wsloop for (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) { +! CHECK: omp.wsloop { +! CHECK: omp.loopnest (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) { ! [...] ! CHECK: cf.br ^bb1 ! CHECK: ^bb1: diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index b3758f1fdc00f..e2a71e3d1b888 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -22,7 +22,8 @@ program wsloop_variable !CHECK: %[[TMP5:.*]] = fir.convert %{{.*}} : (i128) -> i64 !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 -!CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]]#1 : !fir.ref !CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]]#1 : !fir.ref @@ -46,7 +47,8 @@ program wsloop_variable !CHECK: %[[TMP12:.*]] = arith.constant 1 : i32 !CHECK: %[[TMP13:.*]] = fir.convert %{{.*}} : (i8) -> i32 !CHECK: %[[TMP14:.*]] = fir.convert %{{.*}} : (i64) -> i32 -!CHECK: omp.wsloop for (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG0:.*]]) : i32 = (%[[TMP12]]) to (%[[TMP13]]) inclusive step (%[[TMP14]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i32) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE3:.*]]#1 : !fir.ref !CHECK: %[[LOAD3:.*]] = fir.load %[[STORE3]]#0 : !fir.ref @@ -64,7 +66,8 @@ program wsloop_variable !CHECK: %[[TMP17:.*]] = fir.convert %{{.*}} : (i8) -> i64 !CHECK: %[[TMP18:.*]] = fir.convert %{{.*}} : (i16) -> i64 !CHECK: %[[TMP19:.*]] = fir.convert %{{.*}} : (i32) -> i64 -!CHECK: omp.wsloop for (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[ARG1:.*]]) : i64 = (%[[TMP17]]) to (%[[TMP18]]) inclusive step (%[[TMP19]]) { !CHECK: %[[ARG1_I128:.*]] = fir.convert %[[ARG1]] : (i64) -> i128 !CHECK: fir.store %[[ARG1_I128]] to %[[STORE4:.*]]#1 : !fir.ref !CHECK: %[[LOAD4:.*]] = fir.load %[[STORE4]]#0 : !fir.ref @@ -118,7 +121,8 @@ subroutine wsloop_variable_sub !CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref !CHECK: %[[VAL_25:.*]] = fir.convert %[[VAL_23]] : (i8) -> i32 !CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_24]] : (i16) -> i32 -!CHECK: omp.wsloop for (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[VAL_27:.*]]) : i32 = (%[[VAL_22]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) { !CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> i16 !CHECK: fir.store %[[VAL_28]] to %[[VAL_3]]#1 : !fir.ref !CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref @@ -160,7 +164,8 @@ subroutine wsloop_variable_sub !CHECK: %[[VAL_50:.*]] = arith.constant 1 : i32 !CHECK: %[[VAL_51:.*]] = arith.constant 10 : i32 !CHECK: %[[VAL_52:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) { +!CHECK: omp.wsloop { +!CHECK: omp.loopnest (%[[VAL_53:.*]]) : i32 = (%[[VAL_50]]) to (%[[VAL_51]]) inclusive step (%[[VAL_52]]) { !CHECK: %[[VAL_54:.*]] = fir.convert %[[VAL_53]] : (i32) -> i8 !CHECK: fir.store %[[VAL_54]] to %[[VAL_1]]#1 : !fir.ref !CHECK: %[[VAL_55:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop.f90 b/flang/test/Lower/OpenMP/wsloop.f90 index 4068f715c3e18..71cd0ddc2763b 100644 --- a/flang/test/Lower/OpenMP/wsloop.f90 +++ b/flang/test/Lower/OpenMP/wsloop.f90 @@ -12,7 +12,8 @@ subroutine simple_loop ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) { !$OMP DO do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_DECL:.*]]#1 : !fir.ref @@ -36,7 +37,8 @@ subroutine simple_loop_with_step ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 2 : i32 - ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) { ! CHECK: fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_DECL]]#0 : !fir.ref !$OMP DO @@ -60,7 +62,8 @@ subroutine loop_with_schedule_nowait ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 - ! CHECK: omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) + ! CHECK: omp.wsloop schedule(runtime) nowait { + ! CHECK: omp.loopnest (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) { !$OMP DO SCHEDULE(runtime) do i=1, 9 ! CHECK: fir.store %[[I]] to %[[IV_DECL]]#1 : !fir.ref diff --git a/flang/test/Transforms/DoConcurrent/basic.f90 b/flang/test/Transforms/DoConcurrent/basic.f90 index a555a25c9bad5..248223d72ff11 100644 --- a/flang/test/Transforms/DoConcurrent/basic.f90 +++ b/flang/test/Transforms/DoConcurrent/basic.f90 @@ -23,7 +23,8 @@ program do_concurrent_basic ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index ! CHECK: %[[STEP:.*]] = arith.constant 1 : index - ! CHECK: omp.wsloop for (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! CHECK: omp.wsloop { + ! CHECK: omp.loopnest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#1 : !fir.ref ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref diff --git a/mlir/test/CAPI/execution_engine.c b/mlir/test/CAPI/execution_engine.c index 38a8fb8c3e213..a3bf38bdfb131 100644 --- a/mlir/test/CAPI/execution_engine.c +++ b/mlir/test/CAPI/execution_engine.c @@ -99,10 +99,13 @@ void testOmpCreation(void) { " %1 = arith.constant 1 : i32 \n" " %2 = arith.constant 2 : i32 \n" " omp.parallel { \n" -" omp.wsloop for (%3) : i32 = (%0) to (%2) step (%1) { \n" +" omp.wsloop { \n" +" omp.loopnest (%3) : i32 = (%0) to (%2) step (%1) { \n" " omp.yield \n" " } \n" " omp.terminator \n" +" } \n" +" omp.terminator \n" " } \n" " llvm.return \n" " } \n" diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index 6cbc0c8f4be9a..de5dfc1a288d0 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -71,7 +71,8 @@ func.func @branch_loop() { func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { // CHECK: omp.parallel omp.parallel { - // CHECK: omp.wsloop for (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) { + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) { "omp.wsloop"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) ({ ^bb0(%arg6: index, %arg7: index): // CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index @@ -320,7 +321,8 @@ llvm.func @_QPsb() { // CHECK-LABEL: @_QPsimple_reduction // CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr // CHECK: omp.parallel -// CHECK: omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) for +// CHECK: omp.wsloop reduction(@eqv_reduction %{{.+}} -> %[[PRV:.+]] : !llvm.ptr) { +// CHECK: omp.loopnest // CHECK: %[[LPRV:.+]] = llvm.load %[[PRV]] : !llvm.ptr -> i32 // CHECK: %[[CMP:.+]] = llvm.icmp "eq" %{{.*}}, %[[LPRV]] : i32 // CHECK: %[[ZEXT:.+]] = llvm.zext %[[CMP]] : i1 to i32 @@ -353,7 +355,8 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) { llvm.store %5, %4 : i32, !llvm.ptr omp.parallel { %6 = llvm.alloca %3 x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array, pinned} : (i64) -> !llvm.ptr - omp.wsloop reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) for (%arg1) : i32 = (%1) to (%0) inclusive step (%1) { + omp.wsloop reduction(@eqv_reduction %4 -> %prv : !llvm.ptr) { + omp.loopnest (%arg1) : i32 = (%1) to (%0) inclusive step (%1) { llvm.store %arg1, %6 : i32, !llvm.ptr %7 = llvm.load %6 : !llvm.ptr -> i32 %8 = llvm.sext %7 : i32 to i64 @@ -367,6 +370,8 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr {fir.bindc_name = "y"}) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir index acd2690c56e2e..d1e515b8ae813 100644 --- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir @@ -5,7 +5,8 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { - // CHECK: omp.wsloop for (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: memref.alloca_scope scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> () @@ -23,8 +24,9 @@ func.func @nested_loops(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { - // CHECK: omp.wsloop for (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) { - // CHECK: memref.alloca_scope + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) { + // CHECK: memref.alloca_scope scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) { // CHECK: omp.parallel // CHECK: omp.wsloop for (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) { @@ -47,7 +49,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) { // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { - // CHECK: omp.wsloop for (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) { + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) { // CHECK: memref.alloca_scope scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) { // CHECK: "test.payload1"(%[[LVAR_AL1]]) : (index) -> () @@ -60,7 +63,8 @@ func.func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index, // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { - // CHECK: omp.wsloop for (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) { + // CHECK: omp.wsloop { + // CHECK: omp.loopnest (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) { // CHECK: memref.alloca_scope scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) { // CHECK: "test.payload2"(%[[LVAR_AL2]]) : (index) -> () diff --git a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir index 37720e98d92a9..f48f8152ee022 100644 --- a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir +++ b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir @@ -32,7 +32,8 @@ llvm.func @repeated_successor_no_args(%arg0: i1) { // CHECK: @repeated_successor_openmp llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) { - omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop { + omp.loopnest (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) { // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]]({{.*}}), ^[[BB2:.*]]({{.*}}) llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64) // CHECK: ^[[BB1]] @@ -41,6 +42,8 @@ llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: // CHECK: ^[[BB2]](%[[ARG:.*]]: i64): // CHECK: llvm.br ^[[BB1]](%[[ARG]] : i64) } + omp.terminator + } llvm.return } diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 448f37b32fff6..8a8ec498ed76b 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -98,7 +98,7 @@ func.func @invalid_parent(%lb : index, %ub : index, %step : index) { func.func @type_mismatch(%lb : index, %ub : index, %step : index) { // TODO Remove induction variables from omp.wsloop. - omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop { // expected-error@+1 {{range argument type does not match corresponding IV type}} "omp.loopnest" (%lb, %ub, %step) ({ ^bb0(%iv2: i32): @@ -112,7 +112,7 @@ func.func @type_mismatch(%lb : index, %ub : index, %step : index) { func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) { // TODO Remove induction variables from omp.wsloop. - omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop { // expected-error@+1 {{number of range arguments and IVs do not match}} "omp.loopnest" (%lb, %ub, %step) ({ ^bb0(%iv1 : index, %iv2 : index): @@ -125,7 +125,7 @@ func.func @iv_number_mismatch(%lb : index, %ub : index, %step : index) { // ----- func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) { - // expected-error @below {{expected 'for'}} + // expected-error @below {{expected '{' to begin a region}} omp.wsloop nowait inclusive for (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield @@ -145,7 +145,7 @@ func.func @order_value(%lb : index, %ub : index, %step : index) { // ----- func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i1) { - // expected-error @below {{expected 'for'}} + // expected-error @below {{expected '{'}} omp.wsloop if(%bool_var: i1) for (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield @@ -155,7 +155,7 @@ func.func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i // ----- func.func @num_threads_not_allowed(%lb : index, %ub : index, %step : index, %int_var : i32) { - // expected-error @below {{expected 'for'}} + // expected-error @below {{expected '{'}} omp.wsloop num_threads(%int_var: i32) for (%iv) : index = (%lb) to (%ub) step (%step) { omp.yield @@ -479,12 +479,14 @@ func.func @foo(%lb : index, %ub : index, %step : index) { %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr // expected-error @below {{expected symbol reference @foo to point to a reduction declaration}} - omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr) - for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@foo %0 -> %prv : !llvm.ptr) { + omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) { %2 = arith.constant 2.0 : f32 omp.reduction %2, %1 : f32, !llvm.ptr omp.yield } + omp.terminator + } return } @@ -507,12 +509,14 @@ func.func @foo(%lb : index, %ub : index, %step : index) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr // expected-error @below {{accumulator variable used more than once}} - omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr) - for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr, @add_f32 %0 -> %prv1 : !llvm.ptr) { + omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) { %2 = arith.constant 2.0 : f32 omp.reduction %2, %0 : f32, !llvm.ptr omp.yield } + omp.terminator + } return } @@ -540,12 +544,14 @@ func.func @foo(%lb : index, %ub : index, %step : index, %mem : memref<1xf32>) { %c1 = arith.constant 1 : i32 // expected-error @below {{expected accumulator ('memref<1xf32>') to be the same type as reduction declaration ('!llvm.ptr')}} - omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>) - for (%iv) : index = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %mem -> %prv : memref<1xf32>) { + omp.loopnest (%iv) : index = (%lb) to (%ub) step (%step) { %2 = arith.constant 2.0 : f32 omp.reduction %2, %mem : f32, memref<1xf32> omp.yield } + omp.terminator + } return } @@ -577,27 +583,32 @@ omp.critical.declare @mutex hint(invalid_hint) // ----- func.func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { - omp.wsloop ordered(1) - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop ordered(1) { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}} omp.ordered_region { omp.terminator } omp.yield } + omp.terminator + } return } // ----- func.func @omp_ordered2(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { - omp.wsloop for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}} omp.ordered_region { omp.terminator } omp.yield } + omp.terminator + } return } @@ -612,25 +623,29 @@ func.func @omp_ordered3(%vec0 : i64) -> () { // ----- func.func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () { - omp.wsloop ordered(0) - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop ordered(0) { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}} omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} omp.yield } + omp.terminator + } return } // ----- func.func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () { - omp.wsloop ordered(1) - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop ordered(1) { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}} omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64} omp.yield } + omp.terminator + } return } @@ -1462,12 +1477,14 @@ func.func @omp_cancel2() { // ----- func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { - omp.wsloop nowait - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop nowait { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{A worksharing construct that is canceled must not have a nowait clause}} omp.cancel cancellation_construct_type(loop) // CHECK: omp.terminator omp.terminator + } + omp.terminator } return } @@ -1475,12 +1492,14 @@ func.func @omp_cancel3(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { // ----- func.func @omp_cancel4(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () { - omp.wsloop ordered(1) - for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { + omp.wsloop ordered(1) { + omp.loopnest (%0) : i32 = (%arg1) to (%arg2) step (%arg3) { // expected-error @below {{A worksharing construct that is canceled must not have an ordered clause}} omp.cancel cancellation_construct_type(loop) // CHECK: omp.terminator omp.terminator + } + omp.terminator } return } diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir index afbf5f2224630..d1758035b8956 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir @@ -12,10 +12,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop for (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { + omp.wsloop { + omp.loopnest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32> llvm.store %loop_cnt, %gep : i32, !llvm.ptr omp.yield + } + omp.terminator } omp.terminator } diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index 435aca32450c2..36fa2261e385c 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -8,7 +8,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_ub = llvm.mlir.constant(99 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 - omp.wsloop for (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) { + omp.wsloop { + omp.loopnest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) { %1 = llvm.add %arg1, %arg2 : i32 %2 = llvm.mul %arg2, %loop_ub overflow : i32 %3 = llvm.add %arg1, %2 :i32 @@ -16,6 +17,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo llvm.store %1, %4 : i32, !llvm.ptr omp.yield } + omp.terminator + } llvm.return } } diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir index 4cfb7d4f69514..0f48d45c53a09 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir @@ -8,10 +8,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop for (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { + omp.wsloop { + omp.loopnest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32> llvm.store %loop_cnt, %gep : i32, !llvm.ptr omp.yield + } + omp.terminator } llvm.return } @@ -20,8 +23,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop for (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { + omp.wsloop { + omp.loopnest (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { omp.yield + } + omp.terminator } llvm.return } diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 3571526c078a5..a7393c47e1c5d 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -320,7 +320,8 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) { %1 = llvm.mlir.constant(10 : index) : i64 %2 = llvm.mlir.constant(1 : index) : i64 omp.parallel { - "omp.wsloop"(%1, %0, %2) ({ + omp.wsloop { + "omp.loopnest" (%1, %0, %2) ({ ^bb0(%arg1: i64): // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right functions are called. @@ -334,6 +335,8 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) { }) {operandSegmentSizes = array} : (i64, i64, i64) -> () omp.terminator } + omp.terminator + } llvm.return } @@ -345,13 +348,16 @@ llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) { %1 = llvm.mlir.constant(10 : index) : i64 %2 = llvm.mlir.constant(1 : index) : i64 // CHECK: store i64 31, ptr %{{.*}}upperbound - "omp.wsloop"(%1, %0, %2) ({ + omp.wsloop { + "omp.loopnest"(%1, %0, %2) ({ ^bb0(%arg1: i64): %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %3, %4 : f32, !llvm.ptr omp.yield }) {operandSegmentSizes = array} : (i64, i64, i64) -> () + omp.terminator + } llvm.return } @@ -363,13 +369,16 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) { %1 = llvm.mlir.constant(10 : index) : i64 %2 = llvm.mlir.constant(1 : index) : i64 // CHECK: store i64 32, ptr %{{.*}}upperbound - "omp.wsloop"(%1, %0, %2) ({ + omp.wsloop { + "omp.loopnest"(%1, %0, %2) ({ ^bb0(%arg1: i64): %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %3, %4 : f32, !llvm.ptr omp.yield }) {inclusive, operandSegmentSizes = array} : (i64, i64, i64) -> () + omp.terminator + } llvm.return } @@ -379,13 +388,15 @@ llvm.func @body(i32) // CHECK-LABEL: @test_omp_wsloop_static_defchunk llvm.func @test_omp_wsloop_static_defchunk(%lb : i32, %ub : i32, %step : i32) -> () { - omp.wsloop schedule(static) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 34, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 0) // CHECK: call void @__kmpc_for_static_fini llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -396,13 +407,15 @@ llvm.func @body(i32) // CHECK-LABEL: @test_omp_wsloop_static_1 llvm.func @test_omp_wsloop_static_1(%lb : i32, %ub : i32, %step : i32) -> () { %static_chunk_size = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop schedule(static = %static_chunk_size : i32) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static = %static_chunk_size : i32) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 1) // CHECK: call void @__kmpc_for_static_fini llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -413,13 +426,15 @@ llvm.func @body(i32) // CHECK-LABEL: @test_omp_wsloop_static_2 llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () { %static_chunk_size = llvm.mlir.constant(2 : i32) : i32 - omp.wsloop schedule(static = %static_chunk_size : i32) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static = %static_chunk_size : i32) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_for_static_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 33, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 1, i32 2) // CHECK: call void @__kmpc_for_static_fini llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -428,8 +443,8 @@ llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -437,6 +452,8 @@ llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -446,8 +463,8 @@ llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64) -> () { %chunk_size_const = llvm.mlir.constant(2 : i16) : i16 - omp.wsloop schedule(dynamic = %chunk_size_const : i16) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic = %chunk_size_const : i16) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -455,6 +472,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64 llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -466,8 +485,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32) %1 = llvm.mlir.constant(1 : i64) : i64 %chunk_size_alloca = llvm.alloca %1 x i16 {bindc_name = "chunk_size", in_type = i16, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i16 - omp.wsloop schedule(dynamic = %chunk_size_var : i16) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic = %chunk_size_var : i16) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: %[[CHUNK_SIZE:.*]] = sext i16 %{{.*}} to i32 // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u @@ -475,6 +494,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32) // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i32) -> () omp.yield + } + omp.terminator } llvm.return } @@ -487,8 +508,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32) %1 = llvm.mlir.constant(1 : i64) : i64 %chunk_size_alloca = llvm.alloca %1 x i64 {bindc_name = "chunk_size", in_type = i64, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr -> i64 - omp.wsloop schedule(dynamic = %chunk_size_var : i64) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic = %chunk_size_var : i64) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: %[[CHUNK_SIZE:.*]] = trunc i64 %{{.*}} to i32 // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u @@ -497,6 +518,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32) llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -505,8 +528,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32) llvm.func @body(i32) llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, %chunk_size : i32) -> () { - omp.wsloop schedule(dynamic = %chunk_size : i32) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic = %chunk_size : i32) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}}) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -514,6 +537,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -522,8 +547,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, llvm.func @body(i64) llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(auto) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(auto) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -531,6 +556,8 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -539,8 +566,8 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(runtime) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(runtime) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -548,6 +575,8 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -556,8 +585,8 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(guided) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(guided) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -565,6 +594,8 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -573,8 +604,8 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic, nonmonotonic) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic, nonmonotonic) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -582,6 +613,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6 llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -590,8 +623,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6 llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic, monotonic) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic, monotonic) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870947 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -599,6 +632,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -607,8 +642,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) llvm.func @body(i64) llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(runtime, simd) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(runtime, simd) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741871 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -616,6 +651,8 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -624,8 +661,8 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () llvm.func @body(i64) llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(guided, simd) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(guided, simd) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741870 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -633,6 +670,8 @@ llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -782,8 +821,8 @@ llvm.func @simdloop_if(%arg0: !llvm.ptr {fir.bindc_name = "n"}, %arg1: !llvm.ptr llvm.func @body(i64) llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -792,6 +831,8 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () { llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -800,8 +841,8 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () { llvm.func @body(i64) llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(static) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -810,6 +851,8 @@ llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -819,8 +862,8 @@ llvm.func @body(i32) llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i32) -> () { %static_chunk_size = llvm.mlir.constant(1 : i32) : i32 - omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0) - for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0) { + omp.loopnest (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_4u(ptr @{{.*}}, i32 %{{.*}}, i32 65, i32 1, i32 %{{.*}}, i32 1, i32 1) // CHECK: call void @__kmpc_dispatch_fini_4u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u @@ -829,6 +872,8 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3 llvm.call @body(%iv) : (i32) -> () omp.yield } + omp.terminator + } llvm.return } @@ -837,8 +882,8 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3 llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 67, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -847,6 +892,8 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -855,8 +902,8 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.func @body(i64) llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(auto) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(auto) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 70, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -865,6 +912,8 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -873,8 +922,8 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () llvm.func @body(i64) llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(runtime) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(runtime) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 69, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -883,6 +932,8 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -891,8 +942,8 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.func @body(i64) llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(guided) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(guided) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 68, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -901,6 +952,8 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -909,8 +962,8 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic, nonmonotonic) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic, nonmonotonic) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -919,6 +972,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -927,8 +982,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop schedule(dynamic, monotonic) ordered(0) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop schedule(dynamic, monotonic) ordered(0) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870979, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u @@ -937,6 +992,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step llvm.call @body(%iv) : (i64) -> () omp.yield } + omp.terminator + } llvm.return } @@ -1103,8 +1160,8 @@ llvm.func @collapse_wsloop( // CHECK: %[[TOTAL_SUB_1:.*]] = sub i32 %[[TOTAL]], 1 // CHECK: store i32 %[[TOTAL_SUB_1]], ptr // CHECK: call void @__kmpc_for_static_init_4u - omp.wsloop - for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.wsloop { + omp.loopnest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 @@ -1113,6 +1170,8 @@ llvm.func @collapse_wsloop( omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -1164,8 +1223,8 @@ llvm.func @collapse_wsloop_dynamic( // CHECK: store i32 1, ptr // CHECK: store i32 %[[TOTAL]], ptr // CHECK: call void @__kmpc_dispatch_init_4u - omp.wsloop schedule(dynamic) - for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.wsloop schedule(dynamic) { + omp.loopnest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 @@ -1174,6 +1233,8 @@ llvm.func @collapse_wsloop_dynamic( omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -1196,8 +1257,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, // CHECK: call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 [[OMP_THREAD]]) } - omp.wsloop ordered(0) - for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop ordered(0) { + omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { // CHECK: call void @__kmpc_ordered(ptr @[[GLOB3:[0-9]+]], i32 [[OMP_THREAD2:%.*]]) omp.ordered_region { omp.terminator @@ -1205,9 +1266,11 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, } omp.yield } + omp.terminator + } - omp.wsloop ordered(1) - for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop ordered(1) { + omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0 // CHECK: store i64 [[ARG0:%.*]], ptr [[TMP]], align 8 // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], ptr [[ADDR]], i64 0, i64 0 @@ -1224,9 +1287,11 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, omp.yield } + omp.terminator + } - omp.wsloop ordered(2) - for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop ordered(2) { + omp.loopnest (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) { // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 0 // CHECK: store i64 [[ARG0]], ptr [[TMP5]], align 8 // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], ptr [[ADDR5]], i64 0, i64 1 @@ -1254,6 +1319,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64, omp.yield } + omp.terminator + } llvm.return } @@ -2122,11 +2189,14 @@ llvm.func @omp_sections_with_clauses() -> () { // introduction mechanism itself is tested elsewhere. // CHECK-LABEL: @repeated_successor llvm.func @repeated_successor(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) { - omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) { + omp.wsloop { + omp.loopnest (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) { llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64) ^bb1(%0: i64): // 2 preds: ^bb0, ^bb0 omp.yield } + omp.terminator + } llvm.return } @@ -2549,8 +2619,8 @@ llvm.func @omp_opaque_pointers(%arg0 : !llvm.ptr, %arg1: !llvm.ptr, %expr: i32) // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1 -module attributes {omp.flags = #omp.flags} {} // ----- @@ -2595,8 +2665,8 @@ module attributes {omp.version = #omp.version} {} // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 -module attributes {omp.flags = #omp.flags} {} // ----- diff --git a/mlir/test/Target/LLVMIR/openmp-nested.mlir b/mlir/test/Target/LLVMIR/openmp-nested.mlir index e1fdfdd24a3cb..35bc63fb3b4a5 100644 --- a/mlir/test/Target/LLVMIR/openmp-nested.mlir +++ b/mlir/test/Target/LLVMIR/openmp-nested.mlir @@ -11,13 +11,17 @@ module { %2 = llvm.mlir.constant(0 : index) : i64 %4 = llvm.mlir.constant(0 : i32) : i32 %12 = llvm.alloca %0 x i64 : (i64) -> !llvm.ptr - omp.wsloop for (%arg2) : i64 = (%2) to (%1) step (%0) { + omp.wsloop { + omp.loopnest (%arg2) : i64 = (%2) to (%1) step (%0) { omp.parallel { - omp.wsloop for (%arg3) : i64 = (%2) to (%0) step (%0) { + omp.wsloop { + omp.loopnest (%arg3) : i64 = (%2) to (%0) step (%0) { llvm.store %2, %12 : i64, !llvm.ptr omp.yield } omp.terminator + } + omp.terminator } %19 = llvm.load %12 : !llvm.ptr -> i64 %20 = llvm.trunc %19 : i64 to i32 @@ -27,6 +31,8 @@ module { omp.yield } omp.terminator + } + omp.terminator } %a4 = llvm.mlir.constant(0 : i32) : i32 llvm.return %a4 : i32 diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir index 9543458e950be..deeaf8219ce95 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir @@ -26,8 +26,8 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) { %c1 = llvm.mlir.constant(1 : i32) : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %2 = llvm.load %prv : !llvm.ptr -> f32 %3 = llvm.fadd %1, %2 : f32 @@ -35,6 +35,8 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -105,8 +107,8 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %3 = llvm.load %prv0 : !llvm.ptr -> f32 %4 = llvm.fadd %3, %1 : f32 @@ -117,6 +119,8 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -195,8 +199,8 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @add_f32 %2 -> %prv1 : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %3 = llvm.load %prv0 : !llvm.ptr -> f32 %4 = llvm.fadd %3, %1 : f32 @@ -204,6 +208,8 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -280,8 +286,8 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) { %c1 = llvm.mlir.constant(1 : i32) : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %2 = llvm.load %prv : !llvm.ptr -> f32 %3 = llvm.fadd %2, %1 : f32 @@ -292,6 +298,8 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -374,8 +382,8 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) { %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr omp.parallel { - omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr) - for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop reduction(@add_f32 %0 -> %prv0 : !llvm.ptr, @mul_f32 %2 -> %prv1 : !llvm.ptr) { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %1 = llvm.mlir.constant(2.0 : f32) : f32 %3 = llvm.load %prv0 : !llvm.ptr -> f32 %4 = llvm.fadd %3, %1 : f32 @@ -386,6 +394,8 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return } @@ -529,9 +539,10 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) { %lb = llvm.mlir.constant(1 : i64) : i64 %step = llvm.mlir.constant(1 : i64) : i64 - + omp.parallel reduction(@add_i32 %0 -> %prv : !llvm.ptr) { - omp.wsloop for (%iv) : i64 = (%lb) to (%ub) step (%step) { + omp.wsloop { + omp.loopnest (%iv) : i64 = (%lb) to (%ub) step (%step) { %ival = llvm.trunc %iv : i64 to i32 %lprv = llvm.load %prv : !llvm.ptr -> i32 %add = llvm.add %lprv, %ival : i32 @@ -539,6 +550,8 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) { omp.yield } omp.terminator + } + omp.terminator } llvm.return